mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
3f8ef65af9
Fixes the below NULL pointer dereference: [...] [ 14.471200] Call Trace: [ 14.471562] <TASK> [ 14.471882] lock_acquire+0x245/0x2e0 [ 14.472416] ? remove_wait_queue+0x12/0x50 [ 14.473014] ? _raw_spin_lock_irqsave+0x17/0x50 [ 14.473681] _raw_spin_lock_irqsave+0x3d/0x50 [ 14.474318] ? remove_wait_queue+0x12/0x50 [ 14.474907] remove_wait_queue+0x12/0x50 [ 14.475480] sk_stream_wait_memory+0x20d/0x340 [ 14.476127] ? do_wait_intr_irq+0x80/0x80 [ 14.476704] do_tcp_sendpages+0x287/0x600 [ 14.477283] tcp_bpf_push+0xab/0x260 [ 14.477817] tcp_bpf_sendmsg_redir+0x297/0x500 [ 14.478461] ? __local_bh_enable_ip+0x77/0xe0 [ 14.479096] tcp_bpf_send_verdict+0x105/0x470 [ 14.479729] tcp_bpf_sendmsg+0x318/0x4f0 [ 14.480311] sock_sendmsg+0x2d/0x40 [ 14.480822] ____sys_sendmsg+0x1b4/0x1c0 [ 14.481390] ? copy_msghdr_from_user+0x62/0x80 [ 14.482048] ___sys_sendmsg+0x78/0xb0 [ 14.482580] ? vmf_insert_pfn_prot+0x91/0x150 [ 14.483215] ? __do_fault+0x2a/0x1a0 [ 14.483738] ? do_fault+0x15e/0x5d0 [ 14.484246] ? __handle_mm_fault+0x56b/0x1040 [ 14.484874] ? lock_is_held_type+0xdf/0x130 [ 14.485474] ? find_held_lock+0x2d/0x90 [ 14.486046] ? __sys_sendmsg+0x41/0x70 [ 14.486587] __sys_sendmsg+0x41/0x70 [ 14.487105] ? intel_pmu_drain_pebs_core+0x350/0x350 [ 14.487822] do_syscall_64+0x34/0x80 [ 14.488345] entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] The test scenario has the following flow: thread1 thread2 ----------- --------------- tcp_bpf_sendmsg tcp_bpf_send_verdict tcp_bpf_sendmsg_redir sock_close tcp_bpf_push_locked __sock_release tcp_bpf_push //inet_release do_tcp_sendpages sock->ops->release sk_stream_wait_memory // tcp_close sk_wait_event sk->sk_prot->close release_sock(__sk); *** lock_sock(sk); __tcp_close sock_orphan(sk) sk->sk_wq = NULL release_sock **** lock_sock(__sk); remove_wait_queue(sk_sleep(sk), &wait); sk_sleep(sk) //NULL pointer dereference &rcu_dereference_raw(sk->sk_wq)->wait While waiting for memory in thread1, the socket is released with its wait queue because thread2 has closed it. This caused by tcp_bpf_send_verdict didn't increase the f_count of psock->sk_redir->sk_socket->file in thread1. We should check if SOCK_DEAD flag is set on wakeup in sk_stream_wait_memory before accessing the wait queue. Suggested-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Liu Jian <liujian56@huawei.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: John Fastabend <john.fastabend@gmail.com> Cc: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/bpf/20220823133755.314697-2-liujian56@huawei.com
214 lines
5.4 KiB
C
214 lines
5.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* SUCS NET3:
|
|
*
|
|
* Generic stream handling routines. These are generic for most
|
|
* protocols. Even IP. Tonight 8-).
|
|
* This is used because TCP, LLC (others too) layer all have mostly
|
|
* identical sendmsg() and recvmsg() code.
|
|
* So we (will) share it here.
|
|
*
|
|
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
|
|
* (from old tcp.c code)
|
|
* Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-))
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/net.h>
|
|
#include <linux/signal.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/wait.h>
|
|
#include <net/sock.h>
|
|
|
|
/**
|
|
* sk_stream_write_space - stream socket write_space callback.
|
|
* @sk: socket
|
|
*
|
|
* FIXME: write proper description
|
|
*/
|
|
void sk_stream_write_space(struct sock *sk)
|
|
{
|
|
struct socket *sock = sk->sk_socket;
|
|
struct socket_wq *wq;
|
|
|
|
if (__sk_stream_is_writeable(sk, 1) && sock) {
|
|
clear_bit(SOCK_NOSPACE, &sock->flags);
|
|
|
|
rcu_read_lock();
|
|
wq = rcu_dereference(sk->sk_wq);
|
|
if (skwq_has_sleeper(wq))
|
|
wake_up_interruptible_poll(&wq->wait, EPOLLOUT |
|
|
EPOLLWRNORM | EPOLLWRBAND);
|
|
if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
|
|
sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
|
|
rcu_read_unlock();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* sk_stream_wait_connect - Wait for a socket to get into the connected state
|
|
* @sk: sock to wait on
|
|
* @timeo_p: for how long to wait
|
|
*
|
|
* Must be called with the socket locked.
|
|
*/
|
|
int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
|
|
{
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
struct task_struct *tsk = current;
|
|
int done;
|
|
|
|
do {
|
|
int err = sock_error(sk);
|
|
if (err)
|
|
return err;
|
|
if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
|
|
return -EPIPE;
|
|
if (!*timeo_p)
|
|
return -EAGAIN;
|
|
if (signal_pending(tsk))
|
|
return sock_intr_errno(*timeo_p);
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
sk->sk_write_pending++;
|
|
done = sk_wait_event(sk, timeo_p,
|
|
!sk->sk_err &&
|
|
!((1 << sk->sk_state) &
|
|
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
sk->sk_write_pending--;
|
|
} while (!done);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_connect);
|
|
|
|
/**
|
|
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
|
|
* @sk: socket to verify
|
|
*/
|
|
static inline int sk_stream_closing(struct sock *sk)
|
|
{
|
|
return (1 << sk->sk_state) &
|
|
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
|
|
}
|
|
|
|
void sk_stream_wait_close(struct sock *sk, long timeout)
|
|
{
|
|
if (timeout) {
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
do {
|
|
if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait))
|
|
break;
|
|
} while (!signal_pending(current) && timeout);
|
|
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_close);
|
|
|
|
/**
|
|
* sk_stream_wait_memory - Wait for more memory for a socket
|
|
* @sk: socket to wait for memory
|
|
* @timeo_p: for how long
|
|
*/
|
|
int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
|
|
{
|
|
int err = 0;
|
|
long vm_wait = 0;
|
|
long current_timeo = *timeo_p;
|
|
DEFINE_WAIT_FUNC(wait, woken_wake_function);
|
|
|
|
if (sk_stream_memory_free(sk))
|
|
current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
|
|
|
|
add_wait_queue(sk_sleep(sk), &wait);
|
|
|
|
while (1) {
|
|
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
|
|
|
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
|
|
goto do_error;
|
|
if (!*timeo_p)
|
|
goto do_eagain;
|
|
if (signal_pending(current))
|
|
goto do_interrupted;
|
|
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
|
|
if (sk_stream_memory_free(sk) && !vm_wait)
|
|
break;
|
|
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
sk->sk_write_pending++;
|
|
sk_wait_event(sk, ¤t_timeo, sk->sk_err ||
|
|
(sk->sk_shutdown & SEND_SHUTDOWN) ||
|
|
(sk_stream_memory_free(sk) &&
|
|
!vm_wait), &wait);
|
|
sk->sk_write_pending--;
|
|
|
|
if (vm_wait) {
|
|
vm_wait -= current_timeo;
|
|
current_timeo = *timeo_p;
|
|
if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
|
|
(current_timeo -= vm_wait) < 0)
|
|
current_timeo = 0;
|
|
vm_wait = 0;
|
|
}
|
|
*timeo_p = current_timeo;
|
|
}
|
|
out:
|
|
if (!sock_flag(sk, SOCK_DEAD))
|
|
remove_wait_queue(sk_sleep(sk), &wait);
|
|
return err;
|
|
|
|
do_error:
|
|
err = -EPIPE;
|
|
goto out;
|
|
do_eagain:
|
|
/* Make sure that whenever EAGAIN is returned, EPOLLOUT event can
|
|
* be generated later.
|
|
* When TCP receives ACK packets that make room, tcp_check_space()
|
|
* only calls tcp_new_space() if SOCK_NOSPACE is set.
|
|
*/
|
|
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
|
|
err = -EAGAIN;
|
|
goto out;
|
|
do_interrupted:
|
|
err = sock_intr_errno(*timeo_p);
|
|
goto out;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_wait_memory);
|
|
|
|
int sk_stream_error(struct sock *sk, int flags, int err)
|
|
{
|
|
if (err == -EPIPE)
|
|
err = sock_error(sk) ? : -EPIPE;
|
|
if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
|
|
send_sig(SIGPIPE, current, 0);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_error);
|
|
|
|
void sk_stream_kill_queues(struct sock *sk)
|
|
{
|
|
/* First the read buffer. */
|
|
__skb_queue_purge(&sk->sk_receive_queue);
|
|
|
|
/* Next, the write queue. */
|
|
WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue));
|
|
|
|
/* Account for returned memory. */
|
|
sk_mem_reclaim_final(sk);
|
|
|
|
WARN_ON_ONCE(sk->sk_wmem_queued);
|
|
WARN_ON_ONCE(sk->sk_forward_alloc);
|
|
|
|
/* It is _impossible_ for the backlog to contain anything
|
|
* when we get here. All user references to this socket
|
|
* have gone away, only the net layer knows can touch it.
|
|
*/
|
|
}
|
|
EXPORT_SYMBOL(sk_stream_kill_queues);
|