This pull request contains the following changes for UML:

- Removal of dead code (TT mode leftovers, etc.) - Fixes for the network vector driver - Fixes for time-travel mode -----BEGIN PGP SIGNATURE----- iQJKBAABCAA0FiEEdgfidid8lnn52cLTZvlZhesYu8EFAmb3Bf8WHHJpY2hhcmRA c2lnbWEtc3Rhci5hdAAKCRBm+VmF6xi7wZLED/9xfTCB2l8gbiw+vslfXNRKdt6A jcqC4MfZ/dxkt5X2iy9pGROBl0dRe2WAbdSJIBIQikVDaTWg4Vix7WcnMs+FTXxk gGlJU8xx69mh7MWH0DopJrCHYX1SxA4bxve4J7iljTzuMleYhUUJ6bdqmG9XBRBN M1hYWpAodR/BPaDRZtLpzdYuuo3cZPM3ESpke5GupsFEWxqRZqvdlgwdsb9RfrOe 3HvWZUMrw+hWJg1NkQ7+ljqPjoaafGu8/ic89r44bNtNQqN+o5b1v1E792e9qCJD 1jRXTQMtTDH+ETgYskzWEIFyMQ4WlRy/N+mKKUHUJrTwAm76zdSpxmvU2Fh8cvLy ofWdbtqR127WnKii7UpZLf6kXzmC6pcmLbHU78PohoOnEjk4TeMjEKw6FrcSZY51 wGgz29mLJOZ33mh7So37bRU/x5OKkq0u+BHyrhZYiHXdcBN8R5KBSJlWvl+A+A7y F5VpUvqAazc6H0HazZDWtoPDJ4HpbSEbH/8G4rR3IlZ4DmqyRuOr6f4AeiEPFz2n VNQVivgFL59zPflo8eWsLQvK8ZaZSop05RDYRk53uMooUZUNKvhTFmIRCb6bNFT/ c4Ycoi3qa+YQQxSUEKmrE71dOYxdT1nHvl7YgR0BGABWYt2G1j/UyTjJMJ7L49Ws HGpAnI4FdPELu3qIVA== =DHl2 -----END PGP SIGNATURE----- Merge tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux Pull UML updates from Richard Weinberger: - Removal of dead code (TT mode leftovers, etc) - Fixes for the network vector driver - Fixes for time-travel mode * tag 'uml-for-linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: um: fix time-travel syscall scheduling hack um: Remove outdated asm/sysrq.h header um: Remove the declaration of user_thread function um: Remove the call to SUBARCH_EXECVE1 macro um: Remove unused mm_fd field from mm_id um: Remove unused fields from thread_struct um: Remove the redundant newpage check in update_pte_range um: Remove unused kpte_clear_flush macro um: Remove obsoleted declaration for execute_syscall_skas user_mode_linux_howto_v2: add VDE vector support in doc vector_user: add VDE support um: remove ARCH_NO_PREEMPT_DYNAMIC um: vector: Fix NAPI budget handling um: vector: Replace locks guarding queue depth with atomics um: remove variable stack array in os_rcv_fd_msg()
2024-11-12 23:23:03 +00:00 · 2024-09-27 12:48:48 -07:00 · 2024-09-27 12:48:48 -07:00 · 12cc5240f4
commit 12cc5240f4
parent 0c33037c82 381d2f95c8
24 changed files with 285 additions and 188 deletions
--- a/Documentation/virt/uml/user_mode_linux_howto_v2.rst
+++ b/Documentation/virt/uml/user_mode_linux_howto_v2.rst
@ -217,6 +217,8 @@ remote UML and other VM instances.
 +-----------+--------+------------------------------------+------------+
 | fd        | vector | dependent on fd type               | varies     |
 +-----------+--------+------------------------------------+------------+
+| vde       | vector | dep. on VDE VPN: Virt.Net Locator  | varies     |
+-----------+--------+------------------------------------+------------+
 | tuntap    | legacy | none                               | ~ 500Mbit  |
 +-----------+--------+------------------------------------+------------+
 | daemon    | legacy | none                               | ~ 450Mbit  |
@ -573,6 +575,41 @@ https://github.com/NetSys/bess/wiki/Built-In-Modules-and-Ports

 BESS transport does not require any special privileges.

+VDE vector transport
+--------------------
+
+Virtual Distributed Ethernet (VDE) is a project whose main goal is to provide a
+highly flexible support for virtual networking.
+
+http://wiki.virtualsquare.org/#/tutorials/vdebasics
+
+Common usages of VDE include fast prototyping and teaching.
+
+Examples:
+
+   ``vecX:transport=vde,vnl=tap://tap0``
+
+use tap0
+
+   ``vecX:transport=vde,vnl=slirp://``
+
+use slirp
+
+   ``vec0:transport=vde,vnl=vde:///tmp/switch``
+
+connect to a vde switch
+
+   ``vecX:transport=\"vde,vnl=cmd://ssh remote.host //tmp/sshlirp\"``
+
+connect to a remote slirp (instant VPN: convert ssh to VPN, it uses sshlirp)
+https://github.com/virtualsquare/sshlirp
+
+   ``vec0:transport=vde,vnl=vxvde://234.0.0.1``
+
+connect to a local area cloud (all the UML nodes using the same
+multicast address running on hosts in the same multicast domain (LAN)
+will be automagically connected together to a virtual LAN.
+
 Configuring Legacy transports
 =============================

--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@ -11,7 +11,6 @@ config UML
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_STRNCPY_FROM_USER
 	select ARCH_HAS_STRNLEN_USER
-	select ARCH_NO_PREEMPT_DYNAMIC
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KASAN if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/firmware.h>
 #include <linux/fs.h>
+#include <asm/atomic.h>
 #include <uapi/linux/filter.h>
 #include <init.h>
 #include <irq_kern.h>
@ -102,18 +103,33 @@ static const struct {

 static void vector_reset_stats(struct vector_private *vp)
 {
+	/* We reuse the existing queue locks for stats */
+
+	/* RX stats are modified with RX head_lock held
+	 * in vector_poll.
+	 */
+
+	spin_lock(&vp->rx_queue->head_lock);
 	vp->estats.rx_queue_max = 0;
 	vp->estats.rx_queue_running_average = 0;
-	vp->estats.tx_queue_max = 0;
-	vp->estats.tx_queue_running_average = 0;
 	vp->estats.rx_encaps_errors = 0;
+	vp->estats.sg_ok = 0;
+	vp->estats.sg_linearized = 0;
+	spin_unlock(&vp->rx_queue->head_lock);
+
+	/* TX stats are modified with TX head_lock held
+	 * in vector_send.
+	 */
+
+	spin_lock(&vp->tx_queue->head_lock);
 	vp->estats.tx_timeout_count = 0;
 	vp->estats.tx_restart_queue = 0;
 	vp->estats.tx_kicks = 0;
 	vp->estats.tx_flow_control_xon = 0;
 	vp->estats.tx_flow_control_xoff = 0;
-	vp->estats.sg_ok = 0;
-	vp->estats.sg_linearized = 0;
+	vp->estats.tx_queue_max = 0;
+	vp->estats.tx_queue_running_average = 0;
+	spin_unlock(&vp->tx_queue->head_lock);
 }

 static int get_mtu(struct arglist *def)
@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def)

 static char *drop_buffer;

-/* Array backed queues optimized for bulk enqueue/dequeue and
- * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
- * For more details and full design rationale see
- * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
- */
-

 /*
 * Advance the mmsg queue head by n = advance. Resets the queue to
@ -247,27 +257,13 @@ static char *drop_buffer;

 static int vector_advancehead(struct vector_queue *qi, int advance)
 {
-	int queue_depth;
-
 	qi->head =
 		(qi->head + advance)
 			% qi->max_depth;


-	spin_lock(&qi->tail_lock);
-	qi->queue_depth -= advance;
-
-	/* we are at 0, use this to
-	 * reset head and tail so we can use max size vectors
-	 */
-
-	if (qi->queue_depth == 0) {
-		qi->head = 0;
-		qi->tail = 0;
-	}
-	queue_depth = qi->queue_depth;
-	spin_unlock(&qi->tail_lock);
-	return queue_depth;
+	atomic_sub(advance, &qi->queue_depth);
+	return atomic_read(&qi->queue_depth);
 }

 /*	Advance the queue tail by n = advance.
@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance)

 static int vector_advancetail(struct vector_queue *qi, int advance)
 {
-	int queue_depth;
-
 	qi->tail =
 		(qi->tail + advance)
 			% qi->max_depth;
-	spin_lock(&qi->head_lock);
-	qi->queue_depth += advance;
-	queue_depth = qi->queue_depth;
-	spin_unlock(&qi->head_lock);
-	return queue_depth;
+	atomic_add(advance, &qi->queue_depth);
+	return atomic_read(&qi->queue_depth);
 }

 static int prep_msg(struct vector_private *vp,
@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
 	int iov_count;

 	spin_lock(&qi->tail_lock);
-	spin_lock(&qi->head_lock);
-	queue_depth = qi->queue_depth;
-	spin_unlock(&qi->head_lock);
+	queue_depth = atomic_read(&qi->queue_depth);

 	if (skb)
 		packet_len = skb->len;
@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
 		mmsg_vector->msg_hdr.msg_iovlen = iov_count;
 		mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
 		mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+		wmb(); /* Make the packet visible to the NAPI poll thread */
 		queue_depth = vector_advancetail(qi, 1);
 	} else
 		goto drop;
@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count)
 }

 /*
- * Generic vector deque via sendmmsg with support for forming headers
+ * Generic vector dequeue via sendmmsg with support for forming headers
 * using transport specific callback. Allows GRE, L2TPv3, RAW and
 * other transports to use a common dequeue procedure in vector mode
 */
@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi)
 {
 	struct vector_private *vp = netdev_priv(qi->dev);
 	struct mmsghdr *send_from;
-	int result = 0, send_len, queue_depth = qi->max_depth;
+	int result = 0, send_len;

 	if (spin_trylock(&qi->head_lock)) {
-		if (spin_trylock(&qi->tail_lock)) {
-			/* update queue_depth to current value */
-			queue_depth = qi->queue_depth;
-			spin_unlock(&qi->tail_lock);
-			while (queue_depth > 0) {
-				/* Calculate the start of the vector */
-				send_len = queue_depth;
-				send_from = qi->mmsg_vector;
-				send_from += qi->head;
-				/* Adjust vector size if wraparound */
-				if (send_len + qi->head > qi->max_depth)
-					send_len = qi->max_depth - qi->head;
-				/* Try to TX as many packets as possible */
-				if (send_len > 0) {
-					result = uml_vector_sendmmsg(
-						 vp->fds->tx_fd,
-						 send_from,
-						 send_len,
-						 0
-					);
-					vp->in_write_poll =
-						(result != send_len);
-				}
-				/* For some of the sendmmsg error scenarios
-				 * we may end being unsure in the TX success
-				 * for all packets. It is safer to declare
-				 * them all TX-ed and blame the network.
+		/* update queue_depth to current value */
+		while (atomic_read(&qi->queue_depth) > 0) {
+			/* Calculate the start of the vector */
+			send_len = atomic_read(&qi->queue_depth);
+			send_from = qi->mmsg_vector;
+			send_from += qi->head;
+			/* Adjust vector size if wraparound */
+			if (send_len + qi->head > qi->max_depth)
+				send_len = qi->max_depth - qi->head;
+			/* Try to TX as many packets as possible */
+			if (send_len > 0) {
+				result = uml_vector_sendmmsg(
+					 vp->fds->tx_fd,
+					 send_from,
+					 send_len,
+					 0
+				);
+				vp->in_write_poll =
+					(result != send_len);
+			}
+			/* For some of the sendmmsg error scenarios
+			 * we may end being unsure in the TX success
+			 * for all packets. It is safer to declare
+			 * them all TX-ed and blame the network.
+			 */
+			if (result < 0) {
+				if (net_ratelimit())
+					netdev_err(vp->dev, "sendmmsg err=%i\n",
+						result);
+				vp->in_error = true;
+				result = send_len;
+			}
+			if (result > 0) {
+				consume_vector_skbs(qi, result);
+				/* This is equivalent to an TX IRQ.
+				 * Restart the upper layers to feed us
+				 * more packets.
 				 */
-				if (result < 0) {
-					if (net_ratelimit())
-						netdev_err(vp->dev, "sendmmsg err=%i\n",
-							result);
-					vp->in_error = true;
-					result = send_len;
-				}
-				if (result > 0) {
-					queue_depth =
-						consume_vector_skbs(qi, result);
-					/* This is equivalent to an TX IRQ.
-					 * Restart the upper layers to feed us
-					 * more packets.
-					 */
-					if (result > vp->estats.tx_queue_max)
-						vp->estats.tx_queue_max = result;
-					vp->estats.tx_queue_running_average =
-						(vp->estats.tx_queue_running_average + result) >> 1;
-				}
-				netif_wake_queue(qi->dev);
-				/* if TX is busy, break out of the send loop,
-				 *  poll write IRQ will reschedule xmit for us
-				 */
-				if (result != send_len) {
-					vp->estats.tx_restart_queue++;
-					break;
-				}
+				if (result > vp->estats.tx_queue_max)
+					vp->estats.tx_queue_max = result;
+				vp->estats.tx_queue_running_average =
+					(vp->estats.tx_queue_running_average + result) >> 1;
+			}
+			netif_wake_queue(qi->dev);
+			/* if TX is busy, break out of the send loop,
+			 *  poll write IRQ will reschedule xmit for us.
+			 */
+			if (result != send_len) {
+				vp->estats.tx_restart_queue++;
+				break;
 			}
 		}
 		spin_unlock(&qi->head_lock);
 	}
-	return queue_depth;
+	return atomic_read(&qi->queue_depth);
 }

 /* Queue destructor. Deliberately stateless so we can use
@ -589,7 +574,7 @@ static struct vector_queue *create_queue(
 	}
 	spin_lock_init(&result->head_lock);
 	spin_lock_init(&result->tail_lock);
-	result->queue_depth = 0;
+	atomic_set(&result->queue_depth, 0);
 	result->head = 0;
 	result->tail = 0;
 	return result;
@ -668,18 +653,27 @@ done:
 }


-/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */

 static void prep_queue_for_rx(struct vector_queue *qi)
 {
 	struct vector_private *vp = netdev_priv(qi->dev);
 	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
 	void **skbuff_vector = qi->skbuff_vector;
-	int i;
+	int i, queue_depth;

-	if (qi->queue_depth == 0)
+	queue_depth = atomic_read(&qi->queue_depth);
+
+	if (queue_depth == 0)
 		return;
-	for (i = 0; i < qi->queue_depth; i++) {
+
+	/* RX is always emptied 100% during each cycle, so we do not
+	 * have to do the tail wraparound math for it.
+	 */
+
+	qi->head = qi->tail = 0;
+
+	for (i = 0; i < queue_depth; i++) {
 		/* it is OK if allocation fails - recvmmsg with NULL data in
 		 * iov argument still performs an RX, just drops the packet
 		 * This allows us stop faffing around with a "drop buffer"
@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
 		skbuff_vector++;
 		mmsg_vector++;
 	}
-	qi->queue_depth = 0;
+	atomic_set(&qi->queue_depth, 0);
 }

 static struct vector_device *find_device(int n)
@ -972,7 +966,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
 		budget = qi->max_depth;

 	packet_count = uml_vector_recvmmsg(
-		vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+		vp->fds->rx_fd, qi->mmsg_vector, budget, 0);

 	if (packet_count < 0)
 		vp->in_error = true;
@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
 	 * many do we need to prep the next time prep_queue_for_rx() is called.
 	 */

-	qi->queue_depth = packet_count;
+	atomic_add(packet_count, &qi->queue_depth);

 	for (i = 0; i < packet_count; i++) {
 		skb = (*skbuff_vector);
@ -1172,6 +1166,7 @@ static int vector_poll(struct napi_struct *napi, int budget)

 	if ((vp->options & VECTOR_TX) != 0)
 		tx_enqueued = (vector_send(vp->tx_queue) > 0);
+	spin_lock(&vp->rx_queue->head_lock);
 	if ((vp->options & VECTOR_RX) > 0)
 		err = vector_mmsg_rx(vp, budget);
 	else {
@ -1179,12 +1174,13 @@ static int vector_poll(struct napi_struct *napi, int budget)
 		if (err > 0)
 			err = 1;
 	}
+	spin_unlock(&vp->rx_queue->head_lock);
 	if (err > 0)
 		work_done += err;

 	if (tx_enqueued || err > 0)
 		napi_schedule(napi);
-	if (work_done < budget)
+	if (work_done <= budget)
 		napi_complete_done(napi, work_done);
 	return work_done;
 }
@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev)
 			vp->rx_header_size,
 			MAX_IOV_SIZE
 		);
-		vp->rx_queue->queue_depth = get_depth(vp->parsed);
+		atomic_set(&vp->rx_queue->queue_depth, get_depth(vp->parsed));
 	} else {
 		vp->header_rxbuffer = kmalloc(
 			vp->rx_header_size,
@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev,
 {
 	struct vector_private *vp = netdev_priv(dev);

+	/* Stats are modified in the dequeue portions of
+	 * rx/tx which are protected by the head locks
+	 * grabbing these locks here ensures they are up
+	 * to date.
+	 */
+
+	spin_lock(&vp->tx_queue->head_lock);
+	spin_lock(&vp->rx_queue->head_lock);
 	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+	spin_unlock(&vp->rx_queue->head_lock);
+	spin_unlock(&vp->tx_queue->head_lock);
 }

 static int vector_get_coalesce(struct net_device *netdev,
--- a/arch/um/drivers/vector_kern.h
+++ b/arch/um/drivers/vector_kern.h
@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
+#include <asm/atomic.h>

 #include "vector_user.h"

@ -44,7 +45,8 @@ struct vector_queue {
 	struct net_device *dev;
 	spinlock_t head_lock;
 	spinlock_t tail_lock;
-	int queue_depth, head, tail, max_depth, max_iov_frags;
+	atomic_t queue_depth;
+	int head, tail, max_depth, max_iov_frags;
 	short options;
 };

--- a/arch/um/drivers/vector_user.c
+++ b/arch/um/drivers/vector_user.c
@ -46,6 +46,9 @@
 #define TRANS_FD "fd"
 #define TRANS_FD_LEN strlen(TRANS_FD)

+#define TRANS_VDE "vde"
+#define TRANS_VDE_LEN strlen(TRANS_VDE)
+
 #define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
 #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
 #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
@ -434,6 +437,84 @@ fd_cleanup:
 	return NULL;
 }

+/* enough char to store an int type */
+#define ENOUGH(type) ((CHAR_BIT * sizeof(type) - 1) / 3 + 2)
+#define ENOUGH_OCTAL(type) ((CHAR_BIT * sizeof(type) + 2) / 3)
+/* vde_plug --descr xx --port2 xx --mod2 xx --group2 xx seqpacket://NN vnl (NULL) */
+#define VDE_MAX_ARGC 12
+#define VDE_SEQPACKET_HEAD "seqpacket://"
+#define VDE_SEQPACKET_HEAD_LEN (sizeof(VDE_SEQPACKET_HEAD) - 1)
+#define VDE_DEFAULT_DESCRIPTION "UML"
+
+static struct vector_fds *user_init_vde_fds(struct arglist *ifspec)
+{
+	char seqpacketvnl[VDE_SEQPACKET_HEAD_LEN + ENOUGH(int) + 1];
+	char *argv[VDE_MAX_ARGC] = {"vde_plug"};
+	int argc = 1;
+	int rv;
+	int sv[2];
+	struct vector_fds *result = NULL;
+
+	char *vnl = uml_vector_fetch_arg(ifspec,"vnl");
+	char *descr = uml_vector_fetch_arg(ifspec,"descr");
+	char *port = uml_vector_fetch_arg(ifspec,"port");
+	char *mode = uml_vector_fetch_arg(ifspec,"mode");
+	char *group = uml_vector_fetch_arg(ifspec,"group");
+	if (descr == NULL) descr = VDE_DEFAULT_DESCRIPTION;
+
+	argv[argc++] = "--descr";
+	argv[argc++] = descr;
+	if (port != NULL) {
+		argv[argc++] = "--port2";
+		argv[argc++] = port;
+	}
+	if (mode != NULL) {
+		argv[argc++] = "--mod2";
+		argv[argc++] = mode;
+	}
+	if (group != NULL) {
+		argv[argc++] = "--group2";
+		argv[argc++] = group;
+	}
+	argv[argc++] = seqpacketvnl;
+	argv[argc++] = vnl;
+	argv[argc++] = NULL;
+
+	rv = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sv);
+	if (rv  < 0) {
+		printk(UM_KERN_ERR "vde: seqpacket socketpair err %d", -errno);
+		return NULL;
+	}
+	rv = os_set_exec_close(sv[0]);
+	if (rv  < 0) {
+		printk(UM_KERN_ERR "vde: seqpacket socketpair cloexec err %d", -errno);
+		goto vde_cleanup_sv;
+	}
+	snprintf(seqpacketvnl, sizeof(seqpacketvnl), VDE_SEQPACKET_HEAD "%d", sv[1]);
+
+	run_helper(NULL, NULL, argv);
+
+	close(sv[1]);
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "fd open: allocation failed");
+		goto vde_cleanup;
+	}
+
+	result->rx_fd = sv[0];
+	result->tx_fd = sv[0];
+	result->remote_addr_size = 0;
+	result->remote_addr = NULL;
+	return result;
+
+vde_cleanup_sv:
+	close(sv[1]);
+vde_cleanup:
+	close(sv[0]);
+	return NULL;
+}
+
 static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
 {
 	int rxfd = -1, txfd = -1;
@ -673,6 +754,8 @@ struct vector_fds *uml_vector_user_open(
 		return user_init_unix_fds(parsed, ID_BESS);
 	if (strncmp(transport, TRANS_FD, TRANS_FD_LEN) == 0)
 		return user_init_fd_fds(parsed);
+	if (strncmp(transport, TRANS_VDE, TRANS_VDE_LEN) == 0)
+		return user_init_vde_fds(parsed);
 	return NULL;
 }

--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@ -359,11 +359,4 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
 	return pte;
 }

-/* Clear a kernel PTE and flush it from the TLB */
-#define kpte_clear_flush(ptep, vaddr)		\
-do {						\
-	pte_clear(&init_mm, (vaddr), (ptep));	\
-	__flush_tlb_one((vaddr));		\
-} while (0)
-
 #endif
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@ -28,20 +28,10 @@ struct thread_struct {
 	struct arch_thread arch;
 	jmp_buf switch_buf;
 	struct {
-		int op;
-		union {
-			struct {
-				int pid;
-			} fork, exec;
-			struct {
-				int (*proc)(void *);
-				void *arg;
-			} thread;
-			struct {
-				void (*proc)(void *);
-				void *arg;
-			} cb;
-		} u;
+		struct {
+			int (*proc)(void *);
+			void *arg;
+		} thread;
 	} request;
 };

@ -51,7 +41,7 @@ struct thread_struct {
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
 	.arch			= INIT_ARCH_THREAD, \
-	.request		= { 0 } \
+	.request		= { } \
 }

 /*
--- a/arch/um/include/asm/sysrq.h
+++ b/arch/um/include/asm/sysrq.h
@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __UM_SYSRQ_H
-#define __UM_SYSRQ_H
-
-struct task_struct;
-extern void show_trace(struct task_struct* task, unsigned long *stack);
-
-#endif
--- a/arch/um/include/shared/skas/mm_id.h
+++ b/arch/um/include/shared/skas/mm_id.h
@ -7,10 +7,7 @@
 #define __MM_ID_H

 struct mm_id {
-	union {
-		int mm_fd;
-		int pid;
-	} u;
+	int pid;
 	unsigned long stack;
 	int syscall_data_len;
 };
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@ -10,10 +10,8 @@

 extern int userspace_pid[];

-extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_handler(void);
 extern void handle_syscall(struct uml_pt_regs *regs);
-extern long execute_syscall_skas(void *r);
 extern unsigned long current_stub_stack(void);
 extern struct mm_id *current_mm_id(void);
 extern void current_mm_sync(void);
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@ -35,8 +35,5 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 	PT_REGS_IP(regs) = eip;
 	PT_REGS_SP(regs) = esp;
 	clear_thread_flag(TIF_SINGLESTEP);
-#ifdef SUBARCH_EXECVE1
-	SUBARCH_EXECVE1(regs->regs);
-#endif
 }
 EXPORT_SYMBOL(start_thread);
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@ -109,8 +109,8 @@ void new_thread_handler(void)
 		schedule_tail(current->thread.prev_sched);
 	current->thread.prev_sched = NULL;

-	fn = current->thread.request.u.thread.proc;
-	arg = current->thread.request.u.thread.arg;
+	fn = current->thread.request.thread.proc;
+	arg = current->thread.request.thread.arg;

 	/*
 	 * callback returns only if the kernel thread execs a process
@ -158,8 +158,8 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
-		p->thread.request.u.thread.proc = args->fn;
-		p->thread.request.u.thread.arg = args->fn_arg;
+		p->thread.request.thread.proc = args->fn;
+		p->thread.request.thread.arg = args->fn_arg;
 		handler = new_thread_handler;
 	}

--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@ -29,7 +29,7 @@ static void kill_off_processes(void)
 		t = find_lock_task_mm(p);
 		if (!t)
 			continue;
-		pid = t->mm->context.id.u.pid;
+		pid = t->mm->context.id.pid;
 		task_unlock(t);
 		os_kill_ptraced_process(pid, 1);
 	}
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@ -32,11 +32,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
 	new_id->stack = stack;

 	block_signals_trace();
-	new_id->u.pid = start_userspace(stack);
+	new_id->pid = start_userspace(stack);
 	unblock_signals_trace();

-	if (new_id->u.pid < 0) {
-		ret = new_id->u.pid;
+	if (new_id->pid < 0) {
+		ret = new_id->pid;
 		goto out_free;
 	}

@ -83,12 +83,12 @@ void destroy_context(struct mm_struct *mm)
 	 * whole UML suddenly dying.  Also, cover negative and
 	 * 1 cases, since they shouldn't happen either.
 	 */
-	if (mmu->id.u.pid < 2) {
+	if (mmu->id.pid < 2) {
 		printk(KERN_ERR "corrupt mm_context - pid = %d\n",
-		       mmu->id.u.pid);
+		       mmu->id.pid);
 		return;
 	}
-	os_kill_ptraced_process(mmu->id.u.pid, 1);
+	os_kill_ptraced_process(mmu->id.pid, 1);

 	free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
 }
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@ -39,8 +39,8 @@ int __init start_uml(void)

 	init_new_thread_signals();

-	init_task.thread.request.u.thread.proc = start_kernel_proc;
-	init_task.thread.request.u.thread.arg = NULL;
+	init_task.thread.request.thread.proc = start_kernel_proc;
+	init_task.thread.request.thread.arg = NULL;
 	return start_idle_thread(task_stack_page(&init_task),
 				 &init_task.thread.switch_buf);
 }
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@ -12,23 +12,13 @@
 #include <sysdep/syscalls.h>
 #include <linux/time-internal.h>
 #include <asm/unistd.h>
+#include <asm/delay.h>

 void handle_syscall(struct uml_pt_regs *r)
 {
 	struct pt_regs *regs = container_of(r, struct pt_regs, regs);
 	int syscall;

-	/*
-	 * If we have infinite CPU resources, then make every syscall also a
-	 * preemption point, since we don't have any other preemption in this
-	 * case, and kernel threads would basically never run until userspace
-	 * went to sleep, even if said userspace interacts with the kernel in
-	 * various ways.
-	 */
-	if (time_travel_mode == TT_MODE_INFCPU ||
-	    time_travel_mode == TT_MODE_EXTERNAL)
-		schedule();
-
 	/* Initialize the syscall number and default return value. */
 	UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
 	PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
@ -41,9 +31,25 @@ void handle_syscall(struct uml_pt_regs *r)
 		goto out;

 	syscall = UPT_SYSCALL_NR(r);
-	if (syscall >= 0 && syscall < __NR_syscalls)
-		PT_REGS_SET_SYSCALL_RETURN(regs,
-				EXECUTE_SYSCALL(syscall, regs));
+	if (syscall >= 0 && syscall < __NR_syscalls) {
+		unsigned long ret = EXECUTE_SYSCALL(syscall, regs);
+
+		PT_REGS_SET_SYSCALL_RETURN(regs, ret);
+
+		/*
+		 * An error value here can be some form of -ERESTARTSYS
+		 * and then we'd just loop. Make any error syscalls take
+		 * some time, so that it won't just loop if something is
+		 * not ready, and hopefully other things will make some
+		 * progress.
+		 */
+		if (IS_ERR_VALUE(ret) &&
+		    (time_travel_mode == TT_MODE_INFCPU ||
+		     time_travel_mode == TT_MODE_EXTERNAL)) {
+			um_udelay(1);
+			schedule();
+		}
+	}

 out:
 	syscall_trace_leave(regs);
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@ -11,7 +11,6 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>

-#include <asm/sysrq.h>
 #include <asm/stacktrace.h>
 #include <os.h>

--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@ -839,7 +839,7 @@ static irqreturn_t um_timer(int irq, void *dev)
 	if (get_current()->mm != NULL)
 	{
        /* userspace - relay signal, results in correct userspace timers */
-		os_alarm_process(get_current()->mm->context.id.u.pid);
+		os_alarm_process(get_current()->mm->context.id.pid);
 	}

 	(*timer_clockevent.event_handler)(&timer_clockevent);
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@ -82,16 +82,12 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 			(x ? UM_PROT_EXEC : 0));
 		if (pte_newpage(*pte)) {
 			if (pte_present(*pte)) {
-				if (pte_newpage(*pte)) {
-					__u64 offset;
-					unsigned long phys =
-						pte_val(*pte) & PAGE_MASK;
-					int fd =  phys_mapping(phys, &offset);
+				__u64 offset;
+				unsigned long phys = pte_val(*pte) & PAGE_MASK;
+				int fd = phys_mapping(phys, &offset);

-					ret = ops->mmap(ops->mm_idp, addr,
-							PAGE_SIZE, prot, fd,
-							offset);
-				}
+				ret = ops->mmap(ops->mm_idp, addr, PAGE_SIZE,
+						prot, fd, offset);
 			} else
 				ret = ops->unmap(ops->mm_idp, addr, PAGE_SIZE);
 		} else if (pte_newprot(*pte))
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@ -528,7 +528,8 @@ int os_shutdown_socket(int fd, int r, int w)
 ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
 		      void *data, size_t data_len)
 {
-	char buf[CMSG_SPACE(sizeof(*fds) * n_fds)];
+#define MAX_RCV_FDS	2
+	char buf[CMSG_SPACE(sizeof(*fds) * MAX_RCV_FDS)];
 	struct cmsghdr *cmsg;
 	struct iovec iov = {
 		.iov_base = data,
@ -538,10 +539,13 @@ ssize_t os_rcv_fd_msg(int fd, int *fds, unsigned int n_fds,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
 		.msg_control = buf,
-		.msg_controllen = sizeof(buf),
+		.msg_controllen = CMSG_SPACE(sizeof(*fds) * n_fds),
 	};
 	int n;

+	if (n_fds > MAX_RCV_FDS)
+		return -EINVAL;
+
 	n = recvmsg(fd, &msg, 0);
 	if (n < 0)
 		return -errno;
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@ -78,7 +78,7 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
 {
 	struct stub_data *proc_data = (void *)mm_idp->stack;
 	int n, i;
-	int err, pid = mm_idp->u.pid;
+	int err, pid = mm_idp->pid;

 	n = ptrace_setregs(pid, syscall_regs);
 	if (n < 0) {
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@ -588,5 +588,5 @@ void reboot_skas(void)

 void __switch_mm(struct mm_id *mm_idp)
 {
-	userspace_pid[0] = mm_idp->u.pid;
+	userspace_pid[0] = mm_idp->pid;
 }
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@ -9,7 +9,6 @@
 #include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <asm/ptrace.h>
-#include <asm/sysrq.h>

 /* This is declared by <linux/sched.h> */
 void show_regs(struct pt_regs *regs)
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@ -12,7 +12,6 @@
 #include <linux/utsname.h>
 #include <asm/current.h>
 #include <asm/ptrace.h>
-#include <asm/sysrq.h>

 void show_regs(struct pt_regs *regs)
 {