nbd: Fix hung on disconnect request if socket is closed before

When userspace closes the socket before sending a disconnect
request, the following I/O requests will be blocked in
wait_for_reconnect() until dead timeout. This will cause the
following disconnect request also hung on blk_mq_quiesce_queue().
That means we have no way to disconnect a nbd device if there
are some I/O requests waiting for reconnecting until dead timeout.
It's not expected. So let's wake up the thread waiting for
reconnecting directly when a disconnect request is sent.

Reported-by: Xu Jianhai <zero.xu@bytedance.com>
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20220322080639.142-1-xieyongji@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Xie Yongji 2022-03-22 16:06:39 +08:00 committed by Jens Axboe
parent c23d47abee
commit 491bf8f236

View File

@ -946,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
struct nbd_config *config = nbd->config; struct nbd_config *config = nbd->config;
if (!config->dead_conn_timeout) if (!config->dead_conn_timeout)
return 0; return 0;
if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
if (!wait_event_timeout(config->conn_wait,
test_bit(NBD_RT_DISCONNECTED,
&config->runtime_flags) ||
atomic_read(&config->live_connections) > 0,
config->dead_conn_timeout))
return 0; return 0;
return wait_event_timeout(config->conn_wait,
atomic_read(&config->live_connections) > 0, return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
config->dead_conn_timeout) > 0;
} }
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@ -2076,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
mutex_lock(&nbd->config_lock); mutex_lock(&nbd->config_lock);
nbd_disconnect(nbd); nbd_disconnect(nbd);
sock_shutdown(nbd); sock_shutdown(nbd);
wake_up(&nbd->config->conn_wait);
/* /*
* Make sure recv thread has finished, we can safely call nbd_clear_que() * Make sure recv thread has finished, we can safely call nbd_clear_que()
* to cancel the inflight I/Os. * to cancel the inflight I/Os.