248f219cb8
Rewrite the data and ack handling code such that: (1) Parsing of received ACK and ABORT packets and the distribution and the filing of DATA packets happens entirely within the data_ready context called from the UDP socket. This allows us to process and discard ACK and ABORT packets much more quickly (they're no longer stashed on a queue for a background thread to process). (2) We avoid calling skb_clone(), pskb_pull() and pskb_trim(). We instead keep track of the offset and length of the content of each packet in the sk_buff metadata. This means we don't do any allocation in the receive path. (3) Jumbo DATA packet parsing is now done in data_ready context. Rather than cloning the packet once for each subpacket and pulling/trimming it, we file the packet multiple times with an annotation for each indicating which subpacket is there. From that we can directly calculate the offset and length. (4) A call's receive queue can be accessed without taking locks (memory barriers do have to be used, though). (5) Incoming calls are set up from preallocated resources and immediately made live. They can than have packets queued upon them and ACKs generated. If insufficient resources exist, DATA packet #1 is given a BUSY reply and other DATA packets are discarded). (6) sk_buffs no longer take a ref on their parent call. To make this work, the following changes are made: (1) Each call's receive buffer is now a circular buffer of sk_buff pointers (rxtx_buffer) rather than a number of sk_buff_heads spread between the call and the socket. This permits each sk_buff to be in the buffer multiple times. The receive buffer is reused for the transmit buffer. (2) A circular buffer of annotations (rxtx_annotations) is kept parallel to the data buffer. Transmission phase annotations indicate whether a buffered packet has been ACK'd or not and whether it needs retransmission. Receive phase annotations indicate whether a slot holds a whole packet or a jumbo subpacket and, if the latter, which subpacket. They also note whether the packet has been decrypted in place. (3) DATA packet window tracking is much simplified. Each phase has just two numbers representing the window (rx_hard_ack/rx_top and tx_hard_ack/tx_top). The hard_ack number is the sequence number before base of the window, representing the last packet the other side says it has consumed. hard_ack starts from 0 and the first packet is sequence number 1. The top number is the sequence number of the highest-numbered packet residing in the buffer. Packets between hard_ack+1 and top are soft-ACK'd to indicate they've been received, but not yet consumed. Four macros, before(), before_eq(), after() and after_eq() are added to compare sequence numbers within the window. This allows for the top of the window to wrap when the hard-ack sequence number gets close to the limit. Two flags, RXRPC_CALL_RX_LAST and RXRPC_CALL_TX_LAST, are added also to indicate when rx_top and tx_top point at the packets with the LAST_PACKET bit set, indicating the end of the phase. (4) Calls are queued on the socket 'receive queue' rather than packets. This means that we don't need have to invent dummy packets to queue to indicate abnormal/terminal states and we don't have to keep metadata packets (such as ABORTs) around (5) The offset and length of a (sub)packet's content are now passed to the verify_packet security op. This is currently expected to decrypt the packet in place and validate it. However, there's now nowhere to store the revised offset and length of the actual data within the decrypted blob (there may be a header and padding to skip) because an sk_buff may represent multiple packets, so a locate_data security op is added to retrieve these details from the sk_buff content when needed. (6) recvmsg() now has to handle jumbo subpackets, where each subpacket is individually secured and needs to be individually decrypted. The code to do this is broken out into rxrpc_recvmsg_data() and shared with the kernel API. It now iterates over the call's receive buffer rather than walking the socket receive queue. Additional changes: (1) The timers are condensed to a single timer that is set for the soonest of three timeouts (delayed ACK generation, DATA retransmission and call lifespan). (2) Transmission of ACK and ABORT packets is effected immediately from process-context socket ops/kernel API calls that cause them instead of them being punted off to a background work item. The data_ready handler still has to defer to the background, though. (3) A shutdown op is added to the AF_RXRPC socket so that the AFS filesystem can shut down the socket and flush its own work items before closing the socket to deal with any in-progress service calls. Future additional changes that will need to be considered: (1) Make sure that a call doesn't hog the front of the queue by receiving data from the network as fast as userspace is consuming it to the exclusion of other calls. (2) Transmit delayed ACKs from within recvmsg() when we've consumed sufficiently more packets to avoid the background work item needing to run. Signed-off-by: David Howells <dhowells@redhat.com>
859 lines
20 KiB
C
859 lines
20 KiB
C
/* AF_RXRPC implementation
|
|
*
|
|
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/net.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/random.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/key-type.h>
|
|
#include <net/net_namespace.h>
|
|
#include <net/sock.h>
|
|
#include <net/af_rxrpc.h>
|
|
#define CREATE_TRACE_POINTS
|
|
#include "ar-internal.h"
|
|
|
|
MODULE_DESCRIPTION("RxRPC network protocol");
|
|
MODULE_AUTHOR("Red Hat, Inc.");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_ALIAS_NETPROTO(PF_RXRPC);
|
|
|
|
unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
|
|
module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
|
|
MODULE_PARM_DESC(debug, "RxRPC debugging mask");
|
|
|
|
static struct proto rxrpc_proto;
|
|
static const struct proto_ops rxrpc_rpc_ops;
|
|
|
|
/* local epoch for detecting local-end reset */
|
|
u32 rxrpc_epoch;
|
|
|
|
/* current debugging ID */
|
|
atomic_t rxrpc_debug_id;
|
|
|
|
/* count of skbs currently in use */
|
|
atomic_t rxrpc_n_skbs;
|
|
|
|
struct workqueue_struct *rxrpc_workqueue;
|
|
|
|
static void rxrpc_sock_destructor(struct sock *);
|
|
|
|
/*
|
|
* see if an RxRPC socket is currently writable
|
|
*/
|
|
static inline int rxrpc_writable(struct sock *sk)
|
|
{
|
|
return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
|
|
}
|
|
|
|
/*
|
|
* wait for write bufferage to become available
|
|
*/
|
|
static void rxrpc_write_space(struct sock *sk)
|
|
{
|
|
_enter("%p", sk);
|
|
rcu_read_lock();
|
|
if (rxrpc_writable(sk)) {
|
|
struct socket_wq *wq = rcu_dereference(sk->sk_wq);
|
|
|
|
if (skwq_has_sleeper(wq))
|
|
wake_up_interruptible(&wq->wait);
|
|
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
|
|
}
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
/*
|
|
* validate an RxRPC address
|
|
*/
|
|
static int rxrpc_validate_address(struct rxrpc_sock *rx,
|
|
struct sockaddr_rxrpc *srx,
|
|
int len)
|
|
{
|
|
unsigned int tail;
|
|
|
|
if (len < sizeof(struct sockaddr_rxrpc))
|
|
return -EINVAL;
|
|
|
|
if (srx->srx_family != AF_RXRPC)
|
|
return -EAFNOSUPPORT;
|
|
|
|
if (srx->transport_type != SOCK_DGRAM)
|
|
return -ESOCKTNOSUPPORT;
|
|
|
|
len -= offsetof(struct sockaddr_rxrpc, transport);
|
|
if (srx->transport_len < sizeof(sa_family_t) ||
|
|
srx->transport_len > len)
|
|
return -EINVAL;
|
|
|
|
if (srx->transport.family != rx->family)
|
|
return -EAFNOSUPPORT;
|
|
|
|
switch (srx->transport.family) {
|
|
case AF_INET:
|
|
if (srx->transport_len < sizeof(struct sockaddr_in))
|
|
return -EINVAL;
|
|
_debug("INET: %x @ %pI4",
|
|
ntohs(srx->transport.sin.sin_port),
|
|
&srx->transport.sin.sin_addr);
|
|
tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
|
|
break;
|
|
|
|
case AF_INET6:
|
|
default:
|
|
return -EAFNOSUPPORT;
|
|
}
|
|
|
|
if (tail < len)
|
|
memset((void *)srx + tail, 0, len - tail);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* bind a local address to an RxRPC socket
|
|
*/
|
|
static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
|
|
{
|
|
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
|
|
struct sock *sk = sock->sk;
|
|
struct rxrpc_local *local;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
|
|
int ret;
|
|
|
|
_enter("%p,%p,%d", rx, saddr, len);
|
|
|
|
ret = rxrpc_validate_address(rx, srx, len);
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
lock_sock(&rx->sk);
|
|
|
|
if (rx->sk.sk_state != RXRPC_UNBOUND) {
|
|
ret = -EINVAL;
|
|
goto error_unlock;
|
|
}
|
|
|
|
memcpy(&rx->srx, srx, sizeof(rx->srx));
|
|
|
|
local = rxrpc_lookup_local(&rx->srx);
|
|
if (IS_ERR(local)) {
|
|
ret = PTR_ERR(local);
|
|
goto error_unlock;
|
|
}
|
|
|
|
if (rx->srx.srx_service) {
|
|
write_lock(&local->services_lock);
|
|
hlist_for_each_entry(prx, &local->services, listen_link) {
|
|
if (prx->srx.srx_service == rx->srx.srx_service)
|
|
goto service_in_use;
|
|
}
|
|
|
|
rx->local = local;
|
|
hlist_add_head_rcu(&rx->listen_link, &local->services);
|
|
write_unlock(&local->services_lock);
|
|
|
|
rx->sk.sk_state = RXRPC_SERVER_BOUND;
|
|
} else {
|
|
rx->local = local;
|
|
rx->sk.sk_state = RXRPC_CLIENT_BOUND;
|
|
}
|
|
|
|
release_sock(&rx->sk);
|
|
_leave(" = 0");
|
|
return 0;
|
|
|
|
service_in_use:
|
|
write_unlock(&local->services_lock);
|
|
rxrpc_put_local(local);
|
|
ret = -EADDRINUSE;
|
|
error_unlock:
|
|
release_sock(&rx->sk);
|
|
error:
|
|
_leave(" = %d", ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* set the number of pending calls permitted on a listening socket
|
|
*/
|
|
static int rxrpc_listen(struct socket *sock, int backlog)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sk);
|
|
unsigned int max, old;
|
|
int ret;
|
|
|
|
_enter("%p,%d", rx, backlog);
|
|
|
|
lock_sock(&rx->sk);
|
|
|
|
switch (rx->sk.sk_state) {
|
|
case RXRPC_UNBOUND:
|
|
ret = -EADDRNOTAVAIL;
|
|
break;
|
|
case RXRPC_SERVER_BOUND:
|
|
ASSERT(rx->local != NULL);
|
|
max = READ_ONCE(rxrpc_max_backlog);
|
|
ret = -EINVAL;
|
|
if (backlog == INT_MAX)
|
|
backlog = max;
|
|
else if (backlog < 0 || backlog > max)
|
|
break;
|
|
old = sk->sk_max_ack_backlog;
|
|
sk->sk_max_ack_backlog = backlog;
|
|
ret = rxrpc_service_prealloc(rx, GFP_KERNEL);
|
|
if (ret == 0)
|
|
rx->sk.sk_state = RXRPC_SERVER_LISTENING;
|
|
else
|
|
sk->sk_max_ack_backlog = old;
|
|
break;
|
|
default:
|
|
ret = -EBUSY;
|
|
break;
|
|
}
|
|
|
|
release_sock(&rx->sk);
|
|
_leave(" = %d", ret);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
|
|
* @sock: The socket on which to make the call
|
|
* @srx: The address of the peer to contact
|
|
* @key: The security context to use (defaults to socket setting)
|
|
* @user_call_ID: The ID to use
|
|
* @gfp: The allocation constraints
|
|
* @notify_rx: Where to send notifications instead of socket queue
|
|
*
|
|
* Allow a kernel service to begin a call on the nominated socket. This just
|
|
* sets up all the internal tracking structures and allocates connection and
|
|
* call IDs as appropriate. The call to be used is returned.
|
|
*
|
|
* The default socket destination address and security may be overridden by
|
|
* supplying @srx and @key.
|
|
*/
|
|
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
|
|
struct sockaddr_rxrpc *srx,
|
|
struct key *key,
|
|
unsigned long user_call_ID,
|
|
gfp_t gfp,
|
|
rxrpc_notify_rx_t notify_rx)
|
|
{
|
|
struct rxrpc_conn_parameters cp;
|
|
struct rxrpc_call *call;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
|
|
int ret;
|
|
|
|
_enter(",,%x,%lx", key_serial(key), user_call_ID);
|
|
|
|
ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
|
|
if (ret < 0)
|
|
return ERR_PTR(ret);
|
|
|
|
lock_sock(&rx->sk);
|
|
|
|
if (!key)
|
|
key = rx->key;
|
|
if (key && !key->payload.data[0])
|
|
key = NULL; /* a no-security key */
|
|
|
|
memset(&cp, 0, sizeof(cp));
|
|
cp.local = rx->local;
|
|
cp.key = key;
|
|
cp.security_level = 0;
|
|
cp.exclusive = false;
|
|
cp.service_id = srx->srx_service;
|
|
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
|
|
if (!IS_ERR(call))
|
|
call->notify_rx = notify_rx;
|
|
|
|
release_sock(&rx->sk);
|
|
_leave(" = %p", call);
|
|
return call;
|
|
}
|
|
EXPORT_SYMBOL(rxrpc_kernel_begin_call);
|
|
|
|
/**
|
|
* rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
|
|
* @sock: The socket the call is on
|
|
* @call: The call to end
|
|
*
|
|
* Allow a kernel service to end a call it was using. The call must be
|
|
* complete before this is called (the call should be aborted if necessary).
|
|
*/
|
|
void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
|
|
{
|
|
_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
|
|
rxrpc_release_call(rxrpc_sk(sock->sk), call);
|
|
rxrpc_put_call(call, rxrpc_call_put);
|
|
}
|
|
EXPORT_SYMBOL(rxrpc_kernel_end_call);
|
|
|
|
/**
|
|
* rxrpc_kernel_new_call_notification - Get notifications of new calls
|
|
* @sock: The socket to intercept received messages on
|
|
* @notify_new_call: Function to be called when new calls appear
|
|
* @discard_new_call: Function to discard preallocated calls
|
|
*
|
|
* Allow a kernel service to be given notifications about new calls.
|
|
*/
|
|
void rxrpc_kernel_new_call_notification(
|
|
struct socket *sock,
|
|
rxrpc_notify_new_call_t notify_new_call,
|
|
rxrpc_discard_new_call_t discard_new_call)
|
|
{
|
|
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
|
|
|
|
rx->notify_new_call = notify_new_call;
|
|
rx->discard_new_call = discard_new_call;
|
|
}
|
|
EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
|
|
|
|
/*
|
|
* connect an RxRPC socket
|
|
* - this just targets it at a specific destination; no actual connection
|
|
* negotiation takes place
|
|
*/
|
|
static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
|
|
int addr_len, int flags)
|
|
{
|
|
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
|
|
int ret;
|
|
|
|
_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
|
|
|
|
ret = rxrpc_validate_address(rx, srx, addr_len);
|
|
if (ret < 0) {
|
|
_leave(" = %d [bad addr]", ret);
|
|
return ret;
|
|
}
|
|
|
|
lock_sock(&rx->sk);
|
|
|
|
ret = -EISCONN;
|
|
if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags))
|
|
goto error;
|
|
|
|
switch (rx->sk.sk_state) {
|
|
case RXRPC_UNBOUND:
|
|
rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
|
|
case RXRPC_CLIENT_UNBOUND:
|
|
case RXRPC_CLIENT_BOUND:
|
|
break;
|
|
default:
|
|
ret = -EBUSY;
|
|
goto error;
|
|
}
|
|
|
|
rx->connect_srx = *srx;
|
|
set_bit(RXRPC_SOCK_CONNECTED, &rx->flags);
|
|
ret = 0;
|
|
|
|
error:
|
|
release_sock(&rx->sk);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* send a message through an RxRPC socket
|
|
* - in a client this does a number of things:
|
|
* - finds/sets up a connection for the security specified (if any)
|
|
* - initiates a call (ID in control data)
|
|
* - ends the request phase of a call (if MSG_MORE is not set)
|
|
* - sends a call data packet
|
|
* - may send an abort (abort code in control data)
|
|
*/
|
|
static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
|
|
{
|
|
struct rxrpc_local *local;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
|
|
int ret;
|
|
|
|
_enter(",{%d},,%zu", rx->sk.sk_state, len);
|
|
|
|
if (m->msg_flags & MSG_OOB)
|
|
return -EOPNOTSUPP;
|
|
|
|
if (m->msg_name) {
|
|
ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
|
|
if (ret < 0) {
|
|
_leave(" = %d [bad addr]", ret);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
lock_sock(&rx->sk);
|
|
|
|
switch (rx->sk.sk_state) {
|
|
case RXRPC_UNBOUND:
|
|
local = rxrpc_lookup_local(&rx->srx);
|
|
if (IS_ERR(local)) {
|
|
ret = PTR_ERR(local);
|
|
goto error_unlock;
|
|
}
|
|
|
|
rx->local = local;
|
|
rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
|
|
/* Fall through */
|
|
|
|
case RXRPC_CLIENT_UNBOUND:
|
|
case RXRPC_CLIENT_BOUND:
|
|
if (!m->msg_name &&
|
|
test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
|
|
m->msg_name = &rx->connect_srx;
|
|
m->msg_namelen = sizeof(rx->connect_srx);
|
|
}
|
|
case RXRPC_SERVER_BOUND:
|
|
case RXRPC_SERVER_LISTENING:
|
|
ret = rxrpc_do_sendmsg(rx, m, len);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
error_unlock:
|
|
release_sock(&rx->sk);
|
|
_leave(" = %d", ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* set RxRPC socket options
|
|
*/
|
|
static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
|
|
char __user *optval, unsigned int optlen)
|
|
{
|
|
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
|
|
unsigned int min_sec_level;
|
|
int ret;
|
|
|
|
_enter(",%d,%d,,%d", level, optname, optlen);
|
|
|
|
lock_sock(&rx->sk);
|
|
ret = -EOPNOTSUPP;
|
|
|
|
if (level == SOL_RXRPC) {
|
|
switch (optname) {
|
|
case RXRPC_EXCLUSIVE_CONNECTION:
|
|
ret = -EINVAL;
|
|
if (optlen != 0)
|
|
goto error;
|
|
ret = -EISCONN;
|
|
if (rx->sk.sk_state != RXRPC_UNBOUND)
|
|
goto error;
|
|
rx->exclusive = true;
|
|
goto success;
|
|
|
|
case RXRPC_SECURITY_KEY:
|
|
ret = -EINVAL;
|
|
if (rx->key)
|
|
goto error;
|
|
ret = -EISCONN;
|
|
if (rx->sk.sk_state != RXRPC_UNBOUND)
|
|
goto error;
|
|
ret = rxrpc_request_key(rx, optval, optlen);
|
|
goto error;
|
|
|
|
case RXRPC_SECURITY_KEYRING:
|
|
ret = -EINVAL;
|
|
if (rx->key)
|
|
goto error;
|
|
ret = -EISCONN;
|
|
if (rx->sk.sk_state != RXRPC_UNBOUND)
|
|
goto error;
|
|
ret = rxrpc_server_keyring(rx, optval, optlen);
|
|
goto error;
|
|
|
|
case RXRPC_MIN_SECURITY_LEVEL:
|
|
ret = -EINVAL;
|
|
if (optlen != sizeof(unsigned int))
|
|
goto error;
|
|
ret = -EISCONN;
|
|
if (rx->sk.sk_state != RXRPC_UNBOUND)
|
|
goto error;
|
|
ret = get_user(min_sec_level,
|
|
(unsigned int __user *) optval);
|
|
if (ret < 0)
|
|
goto error;
|
|
ret = -EINVAL;
|
|
if (min_sec_level > RXRPC_SECURITY_MAX)
|
|
goto error;
|
|
rx->min_sec_level = min_sec_level;
|
|
goto success;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
success:
|
|
ret = 0;
|
|
error:
|
|
release_sock(&rx->sk);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* permit an RxRPC socket to be polled
|
|
*/
|
|
static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
|
|
poll_table *wait)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sk);
|
|
unsigned int mask;
|
|
|
|
sock_poll_wait(file, sk_sleep(sk), wait);
|
|
mask = 0;
|
|
|
|
/* the socket is readable if there are any messages waiting on the Rx
|
|
* queue */
|
|
if (!list_empty(&rx->recvmsg_q))
|
|
mask |= POLLIN | POLLRDNORM;
|
|
|
|
/* the socket is writable if there is space to add new data to the
|
|
* socket; there is no guarantee that any particular call in progress
|
|
* on the socket may have space in the Tx ACK window */
|
|
if (rxrpc_writable(sk))
|
|
mask |= POLLOUT | POLLWRNORM;
|
|
|
|
return mask;
|
|
}
|
|
|
|
/*
|
|
* create an RxRPC socket
|
|
*/
|
|
static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
|
|
int kern)
|
|
{
|
|
struct rxrpc_sock *rx;
|
|
struct sock *sk;
|
|
|
|
_enter("%p,%d", sock, protocol);
|
|
|
|
if (!net_eq(net, &init_net))
|
|
return -EAFNOSUPPORT;
|
|
|
|
/* we support transport protocol UDP/UDP6 only */
|
|
if (protocol != PF_INET)
|
|
return -EPROTONOSUPPORT;
|
|
|
|
if (sock->type != SOCK_DGRAM)
|
|
return -ESOCKTNOSUPPORT;
|
|
|
|
sock->ops = &rxrpc_rpc_ops;
|
|
sock->state = SS_UNCONNECTED;
|
|
|
|
sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern);
|
|
if (!sk)
|
|
return -ENOMEM;
|
|
|
|
sock_init_data(sock, sk);
|
|
sock_set_flag(sk, SOCK_RCU_FREE);
|
|
sk->sk_state = RXRPC_UNBOUND;
|
|
sk->sk_write_space = rxrpc_write_space;
|
|
sk->sk_max_ack_backlog = 0;
|
|
sk->sk_destruct = rxrpc_sock_destructor;
|
|
|
|
rx = rxrpc_sk(sk);
|
|
rx->family = protocol;
|
|
rx->calls = RB_ROOT;
|
|
|
|
INIT_HLIST_NODE(&rx->listen_link);
|
|
spin_lock_init(&rx->incoming_lock);
|
|
INIT_LIST_HEAD(&rx->sock_calls);
|
|
INIT_LIST_HEAD(&rx->to_be_accepted);
|
|
INIT_LIST_HEAD(&rx->recvmsg_q);
|
|
rwlock_init(&rx->recvmsg_lock);
|
|
rwlock_init(&rx->call_lock);
|
|
memset(&rx->srx, 0, sizeof(rx->srx));
|
|
|
|
_leave(" = 0 [%p]", rx);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Kill all the calls on a socket and shut it down.
|
|
*/
|
|
static int rxrpc_shutdown(struct socket *sock, int flags)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
struct rxrpc_sock *rx = rxrpc_sk(sk);
|
|
int ret = 0;
|
|
|
|
_enter("%p,%d", sk, flags);
|
|
|
|
if (flags != SHUT_RDWR)
|
|
return -EOPNOTSUPP;
|
|
if (sk->sk_state == RXRPC_CLOSE)
|
|
return -ESHUTDOWN;
|
|
|
|
lock_sock(sk);
|
|
|
|
spin_lock_bh(&sk->sk_receive_queue.lock);
|
|
if (sk->sk_state < RXRPC_CLOSE) {
|
|
sk->sk_state = RXRPC_CLOSE;
|
|
sk->sk_shutdown = SHUTDOWN_MASK;
|
|
} else {
|
|
ret = -ESHUTDOWN;
|
|
}
|
|
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
|
|
|
rxrpc_discard_prealloc(rx);
|
|
|
|
release_sock(sk);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* RxRPC socket destructor
|
|
*/
|
|
static void rxrpc_sock_destructor(struct sock *sk)
|
|
{
|
|
_enter("%p", sk);
|
|
|
|
rxrpc_purge_queue(&sk->sk_receive_queue);
|
|
|
|
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
|
|
WARN_ON(!sk_unhashed(sk));
|
|
WARN_ON(sk->sk_socket);
|
|
|
|
if (!sock_flag(sk, SOCK_DEAD)) {
|
|
printk("Attempt to release alive rxrpc socket: %p\n", sk);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* release an RxRPC socket
|
|
*/
|
|
static int rxrpc_release_sock(struct sock *sk)
|
|
{
|
|
struct rxrpc_sock *rx = rxrpc_sk(sk);
|
|
|
|
_enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
|
|
|
|
/* declare the socket closed for business */
|
|
sock_orphan(sk);
|
|
sk->sk_shutdown = SHUTDOWN_MASK;
|
|
|
|
spin_lock_bh(&sk->sk_receive_queue.lock);
|
|
sk->sk_state = RXRPC_CLOSE;
|
|
spin_unlock_bh(&sk->sk_receive_queue.lock);
|
|
|
|
ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
|
|
|
|
if (!hlist_unhashed(&rx->listen_link)) {
|
|
write_lock(&rx->local->services_lock);
|
|
hlist_del_rcu(&rx->listen_link);
|
|
write_unlock(&rx->local->services_lock);
|
|
}
|
|
|
|
/* try to flush out this socket */
|
|
rxrpc_discard_prealloc(rx);
|
|
rxrpc_release_calls_on_socket(rx);
|
|
flush_workqueue(rxrpc_workqueue);
|
|
rxrpc_purge_queue(&sk->sk_receive_queue);
|
|
|
|
rxrpc_put_local(rx->local);
|
|
rx->local = NULL;
|
|
key_put(rx->key);
|
|
rx->key = NULL;
|
|
key_put(rx->securities);
|
|
rx->securities = NULL;
|
|
sock_put(sk);
|
|
|
|
_leave(" = 0");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* release an RxRPC BSD socket on close() or equivalent
|
|
*/
|
|
static int rxrpc_release(struct socket *sock)
|
|
{
|
|
struct sock *sk = sock->sk;
|
|
|
|
_enter("%p{%p}", sock, sk);
|
|
|
|
if (!sk)
|
|
return 0;
|
|
|
|
sock->sk = NULL;
|
|
|
|
return rxrpc_release_sock(sk);
|
|
}
|
|
|
|
/*
|
|
* RxRPC network protocol
|
|
*/
|
|
static const struct proto_ops rxrpc_rpc_ops = {
|
|
.family = PF_RXRPC,
|
|
.owner = THIS_MODULE,
|
|
.release = rxrpc_release,
|
|
.bind = rxrpc_bind,
|
|
.connect = rxrpc_connect,
|
|
.socketpair = sock_no_socketpair,
|
|
.accept = sock_no_accept,
|
|
.getname = sock_no_getname,
|
|
.poll = rxrpc_poll,
|
|
.ioctl = sock_no_ioctl,
|
|
.listen = rxrpc_listen,
|
|
.shutdown = rxrpc_shutdown,
|
|
.setsockopt = rxrpc_setsockopt,
|
|
.getsockopt = sock_no_getsockopt,
|
|
.sendmsg = rxrpc_sendmsg,
|
|
.recvmsg = rxrpc_recvmsg,
|
|
.mmap = sock_no_mmap,
|
|
.sendpage = sock_no_sendpage,
|
|
};
|
|
|
|
static struct proto rxrpc_proto = {
|
|
.name = "RXRPC",
|
|
.owner = THIS_MODULE,
|
|
.obj_size = sizeof(struct rxrpc_sock),
|
|
.max_header = sizeof(struct rxrpc_wire_header),
|
|
};
|
|
|
|
static const struct net_proto_family rxrpc_family_ops = {
|
|
.family = PF_RXRPC,
|
|
.create = rxrpc_create,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
/*
|
|
* initialise and register the RxRPC protocol
|
|
*/
|
|
static int __init af_rxrpc_init(void)
|
|
{
|
|
int ret = -1;
|
|
|
|
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
|
|
|
|
get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
|
|
rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
|
|
get_random_bytes(&rxrpc_client_conn_ids.cur,
|
|
sizeof(rxrpc_client_conn_ids.cur));
|
|
rxrpc_client_conn_ids.cur &= 0x3fffffff;
|
|
if (rxrpc_client_conn_ids.cur == 0)
|
|
rxrpc_client_conn_ids.cur = 1;
|
|
|
|
ret = -ENOMEM;
|
|
rxrpc_call_jar = kmem_cache_create(
|
|
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
|
|
SLAB_HWCACHE_ALIGN, NULL);
|
|
if (!rxrpc_call_jar) {
|
|
pr_notice("Failed to allocate call jar\n");
|
|
goto error_call_jar;
|
|
}
|
|
|
|
rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1);
|
|
if (!rxrpc_workqueue) {
|
|
pr_notice("Failed to allocate work queue\n");
|
|
goto error_work_queue;
|
|
}
|
|
|
|
ret = rxrpc_init_security();
|
|
if (ret < 0) {
|
|
pr_crit("Cannot initialise security\n");
|
|
goto error_security;
|
|
}
|
|
|
|
ret = proto_register(&rxrpc_proto, 1);
|
|
if (ret < 0) {
|
|
pr_crit("Cannot register protocol\n");
|
|
goto error_proto;
|
|
}
|
|
|
|
ret = sock_register(&rxrpc_family_ops);
|
|
if (ret < 0) {
|
|
pr_crit("Cannot register socket family\n");
|
|
goto error_sock;
|
|
}
|
|
|
|
ret = register_key_type(&key_type_rxrpc);
|
|
if (ret < 0) {
|
|
pr_crit("Cannot register client key type\n");
|
|
goto error_key_type;
|
|
}
|
|
|
|
ret = register_key_type(&key_type_rxrpc_s);
|
|
if (ret < 0) {
|
|
pr_crit("Cannot register server key type\n");
|
|
goto error_key_type_s;
|
|
}
|
|
|
|
ret = rxrpc_sysctl_init();
|
|
if (ret < 0) {
|
|
pr_crit("Cannot register sysctls\n");
|
|
goto error_sysctls;
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
|
|
proc_create("rxrpc_conns", 0, init_net.proc_net,
|
|
&rxrpc_connection_seq_fops);
|
|
#endif
|
|
return 0;
|
|
|
|
error_sysctls:
|
|
unregister_key_type(&key_type_rxrpc_s);
|
|
error_key_type_s:
|
|
unregister_key_type(&key_type_rxrpc);
|
|
error_key_type:
|
|
sock_unregister(PF_RXRPC);
|
|
error_sock:
|
|
proto_unregister(&rxrpc_proto);
|
|
error_proto:
|
|
rxrpc_exit_security();
|
|
error_security:
|
|
destroy_workqueue(rxrpc_workqueue);
|
|
error_work_queue:
|
|
kmem_cache_destroy(rxrpc_call_jar);
|
|
error_call_jar:
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* unregister the RxRPC protocol
|
|
*/
|
|
static void __exit af_rxrpc_exit(void)
|
|
{
|
|
_enter("");
|
|
rxrpc_sysctl_exit();
|
|
unregister_key_type(&key_type_rxrpc_s);
|
|
unregister_key_type(&key_type_rxrpc);
|
|
sock_unregister(PF_RXRPC);
|
|
proto_unregister(&rxrpc_proto);
|
|
rxrpc_destroy_all_calls();
|
|
rxrpc_destroy_all_connections();
|
|
ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
|
|
rxrpc_destroy_all_locals();
|
|
|
|
remove_proc_entry("rxrpc_conns", init_net.proc_net);
|
|
remove_proc_entry("rxrpc_calls", init_net.proc_net);
|
|
destroy_workqueue(rxrpc_workqueue);
|
|
rxrpc_exit_security();
|
|
kmem_cache_destroy(rxrpc_call_jar);
|
|
_leave("");
|
|
}
|
|
|
|
module_init(af_rxrpc_init);
|
|
module_exit(af_rxrpc_exit);
|