net: rds: use maybe_get_net() when acquiring refcount on TCP sockets

Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for delayed works queued in rds_wq might be invoked after a net namespace's refcount already reached 0. Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq), it is guaranteed that we can instead use maybe_get_net() from delayed work functions until rds_tcp_exit_net() returns. Note that I'm not convinced that all works which might access a net namespace are already queued in rds_wq by the moment rds_tcp_exit_net() calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net() will fail to wait for work functions, and kmem_cache_free() could be called from net_free() before maybe_get_net() is called from rds_tcp_tune(). Reported-by: Eric Dumazet <edumazet@google.com> Fixes: 3a58f13a88 ("net: rds: acquire refcount on TCP sockets") Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jp Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2024-12-29 06:12:08 +00:00 · 2022-05-05 10:53:53 +09:00 · 2022-05-05 10:53:53 +09:00 · 6997fbd7a3
commit 6997fbd7a3
parent 68533eb1fb
4 changed files with 18 additions and 6 deletions
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@ -487,11 +487,11 @@ struct rds_tcp_net {
 /* All module specific customizations to the RDS-TCP socket should be done in
 * rds_tcp_tune() and applied after socket creation.
 */
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
-	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct rds_tcp_net *rtn;

 	tcp_sock_set_nodelay(sock->sk);
 	lock_sock(sk);
@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock)
 	 * a process which created this net namespace terminated.
 	 */
 	if (!sk->sk_net_refcnt) {
+		if (!maybe_get_net(net)) {
+			release_sock(sk);
+			return false;
+		}
 		sk->sk_net_refcnt = 1;
-		get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+		netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
 		sock_inuse_add(net, 1);
 	}
+	rtn = net_generic(net, rds_tcp_netid);
 	if (rtn->sndbuf_size > 0) {
 		sk->sk_sndbuf = rtn->sndbuf_size;
 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 	}
 	release_sock(sk);
+	return true;
 }

 static void rds_tcp_accept_worker(struct work_struct *work)
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@ -49,7 +49,7 @@ struct rds_tcp_statistics {
 };

 /* tcp.c */
-void rds_tcp_tune(struct socket *sock);
+bool rds_tcp_tune(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
 	if (ret < 0)
 		goto out;

-	rds_tcp_tune(sock);
+	if (!rds_tcp_tune(sock)) {
+		ret = -EINVAL;
+		goto out;
+	}

 	if (isv6) {
 		sin6.sin6_family = AF_INET6;
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock)
 	__module_get(new_sock->ops->owner);

 	rds_tcp_keepalive(new_sock);
-	rds_tcp_tune(new_sock);
+	if (!rds_tcp_tune(new_sock)) {
+		ret = -EINVAL;
+		goto out;
+	}

 	inet = inet_sk(new_sock->sk);