linux/net/rxrpc/conn_event.c
David Howells d001648ec7 rxrpc: Don't expose skbs to in-kernel users [ver #2]
Don't expose skbs to in-kernel users, such as the AFS filesystem, but
instead provide a notification hook the indicates that a call needs
attention and another that indicates that there's a new call to be
collected.

This makes the following possibilities more achievable:

 (1) Call refcounting can be made simpler if skbs don't hold refs to calls.

 (2) skbs referring to non-data events will be able to be freed much sooner
     rather than being queued for AFS to pick up as rxrpc_kernel_recv_data
     will be able to consult the call state.

 (3) We can shortcut the receive phase when a call is remotely aborted
     because we don't have to go through all the packets to get to the one
     cancelling the operation.

 (4) It makes it easier to do encryption/decryption directly between AFS's
     buffers and sk_buffs.

 (5) Encryption/decryption can more easily be done in the AFS's thread
     contexts - usually that of the userspace process that issued a syscall
     - rather than in one of rxrpc's background threads on a workqueue.

 (6) AFS will be able to wait synchronously on a call inside AF_RXRPC.

To make this work, the following interface function has been added:

     int rxrpc_kernel_recv_data(
		struct socket *sock, struct rxrpc_call *call,
		void *buffer, size_t bufsize, size_t *_offset,
		bool want_more, u32 *_abort_code);

This is the recvmsg equivalent.  It allows the caller to find out about the
state of a specific call and to transfer received data into a buffer
piecemeal.

afs_extract_data() and rxrpc_kernel_recv_data() now do all the extraction
logic between them.  They don't wait synchronously yet because the socket
lock needs to be dealt with.

Five interface functions have been removed:

	rxrpc_kernel_is_data_last()
    	rxrpc_kernel_get_abort_code()
    	rxrpc_kernel_get_error_number()
    	rxrpc_kernel_free_skb()
    	rxrpc_kernel_data_consumed()

As a temporary hack, sk_buffs going to an in-kernel call are queued on the
rxrpc_call struct (->knlrecv_queue) rather than being handed over to the
in-kernel user.  To process the queue internally, a temporary function,
temp_deliver_data() has been added.  This will be replaced with common code
between the rxrpc_recvmsg() path and the kernel_rxrpc_recv_data() path in a
future patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-09-01 16:43:27 -07:00

515 lines
12 KiB
C

/* connection-level event handling
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
#include <linux/udp.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/icmp.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
/*
* Retransmit terminal ACK or ABORT of the previous call.
*/
static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_channel *chan;
struct msghdr msg;
struct kvec iov;
struct {
struct rxrpc_wire_header whdr;
union {
struct {
__be32 code;
} abort;
struct {
struct rxrpc_ackpacket ack;
u8 padding[3];
struct rxrpc_ackinfo info;
};
};
} __attribute__((packed)) pkt;
size_t len;
u32 serial, mtu, call_id;
_enter("%d", conn->debug_id);
chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
*/
call_id = READ_ONCE(chan->last_call);
/* Sync with __rxrpc_disconnect_call() */
smp_rmb();
if (call_id != sp->hdr.callNumber)
return;
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
pkt.whdr.epoch = htonl(sp->hdr.epoch);
pkt.whdr.cid = htonl(sp->hdr.cid);
pkt.whdr.callNumber = htonl(sp->hdr.callNumber);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
pkt.whdr.userStatus = 0;
pkt.whdr.securityIndex = conn->security_ix;
pkt.whdr._rsvd = 0;
pkt.whdr.serviceId = htons(chan->last_service_id);
len = sizeof(pkt.whdr);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
pkt.abort.code = htonl(chan->last_abort);
len += sizeof(pkt.abort);
break;
case RXRPC_PACKET_TYPE_ACK:
mtu = conn->params.peer->if_mtu;
mtu -= conn->params.peer->hdrsize;
pkt.ack.bufferSpace = 0;
pkt.ack.maxSkew = htons(skb->priority);
pkt.ack.firstPacket = htonl(chan->last_seq);
pkt.ack.previousPacket = htonl(chan->last_seq - 1);
pkt.ack.serial = htonl(sp->hdr.serial);
pkt.ack.reason = RXRPC_ACK_DUPLICATE;
pkt.ack.nAcks = 0;
pkt.info.rxMTU = htonl(rxrpc_rx_mtu);
pkt.info.maxMTU = htonl(mtu);
pkt.info.rwind = htonl(rxrpc_rx_window_size);
pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
len += sizeof(pkt.ack) + sizeof(pkt.info);
break;
}
/* Resync with __rxrpc_disconnect_call() and check that the last call
* didn't get advanced whilst we were filling out the packets.
*/
smp_rmb();
if (READ_ONCE(chan->last_call) != call_id)
return;
iov.iov_base = &pkt;
iov.iov_len = len;
serial = atomic_inc_return(&conn->serial);
pkt.whdr.serial = htonl(serial);
switch (chan->last_type) {
case RXRPC_PACKET_TYPE_ABORT:
_proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
break;
case RXRPC_PACKET_TYPE_ACK:
_proto("Tx ACK %%%u [re]", serial);
break;
}
kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
_leave("");
return;
}
/*
* pass a connection-level abort onto all calls on that connection
*/
static void rxrpc_abort_calls(struct rxrpc_connection *conn,
enum rxrpc_call_completion compl,
u32 abort_code, int error)
{
struct rxrpc_call *call;
bool queue;
int i, bit;
_enter("{%d},%x", conn->debug_id, abort_code);
if (compl == RXRPC_CALL_LOCALLY_ABORTED)
bit = RXRPC_CALL_EV_CONN_ABORT;
else
bit = RXRPC_CALL_EV_RCVD_ABORT;
spin_lock(&conn->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
call = rcu_dereference_protected(
conn->channels[i].call,
lockdep_is_held(&conn->channel_lock));
if (call) {
rxrpc_see_call(call);
write_lock_bh(&call->state_lock);
if (rxrpc_set_call_completion(call, compl, abort_code,
error)) {
set_bit(bit, &call->events);
queue = true;
}
write_unlock_bh(&call->state_lock);
if (queue)
rxrpc_queue_call(call);
}
}
spin_unlock(&conn->channel_lock);
_leave("");
}
/*
* generate a connection-level abort
*/
static int rxrpc_abort_connection(struct rxrpc_connection *conn,
u32 error, u32 abort_code)
{
struct rxrpc_wire_header whdr;
struct msghdr msg;
struct kvec iov[2];
__be32 word;
size_t len;
u32 serial;
int ret;
_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
/* generate a connection-level abort */
spin_lock_bh(&conn->state_lock);
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
spin_unlock_bh(&conn->state_lock);
_leave(" = 0 [already dead]");
return 0;
}
conn->state = RXRPC_CONN_LOCALLY_ABORTED;
spin_unlock_bh(&conn->state_lock);
rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error);
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
whdr.epoch = htonl(conn->proto.epoch);
whdr.cid = htonl(conn->proto.cid);
whdr.callNumber = 0;
whdr.seq = 0;
whdr.type = RXRPC_PACKET_TYPE_ABORT;
whdr.flags = conn->out_clientflag;
whdr.userStatus = 0;
whdr.securityIndex = conn->security_ix;
whdr._rsvd = 0;
whdr.serviceId = htons(conn->params.service_id);
word = htonl(conn->local_abort);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
iov[1].iov_base = &word;
iov[1].iov_len = sizeof(word);
len = iov[0].iov_len + iov[1].iov_len;
serial = atomic_inc_return(&conn->serial);
whdr.serial = htonl(serial);
_proto("Tx CONN ABORT %%%u { %d }", serial, conn->local_abort);
ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
if (ret < 0) {
_debug("sendmsg failed: %d", ret);
return -EAGAIN;
}
_leave(" = 0");
return 0;
}
/*
* mark a call as being on a now-secured channel
* - must be called with softirqs disabled
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
_enter("%p", call);
if (call) {
read_lock(&call->state_lock);
if (call->state < RXRPC_CALL_COMPLETE &&
!test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
rxrpc_queue_call(call);
read_unlock(&call->state_lock);
}
}
/*
* connection-level Rx packet processor
*/
static int rxrpc_process_event(struct rxrpc_connection *conn,
struct sk_buff *skb,
u32 *_abort_code)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
__be32 wtmp;
u32 abort_code;
int loop, ret;
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
kleave(" = -ECONNABORTED [%u]", conn->state);
return -ECONNABORTED;
}
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
case RXRPC_PACKET_TYPE_ACK:
rxrpc_conn_retransmit_call(conn, skb);
return 0;
case RXRPC_PACKET_TYPE_ABORT:
if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
return -EPROTO;
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, 0, RXRPC_CALL_REMOTELY_ABORTED,
abort_code);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
return conn->security->respond_to_challenge(conn, skb,
_abort_code);
case RXRPC_PACKET_TYPE_RESPONSE:
ret = conn->security->verify_response(conn, skb, _abort_code);
if (ret < 0)
return ret;
ret = conn->security->init_connection_security(conn);
if (ret < 0)
return ret;
ret = conn->security->prime_packet_security(conn);
if (ret < 0)
return ret;
spin_lock(&conn->channel_lock);
spin_lock(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,
lockdep_is_held(&conn->channel_lock)));
}
spin_unlock(&conn->state_lock);
spin_unlock(&conn->channel_lock);
return 0;
default:
_leave(" = -EPROTO [%u]", sp->hdr.type);
return -EPROTO;
}
}
/*
* set up security and issue a challenge
*/
static void rxrpc_secure_connection(struct rxrpc_connection *conn)
{
u32 abort_code;
int ret;
_enter("{%d}", conn->debug_id);
ASSERT(conn->security_ix != 0);
if (!conn->params.key) {
_debug("set up security");
ret = rxrpc_init_server_conn_security(conn);
switch (ret) {
case 0:
break;
case -ENOENT:
abort_code = RX_CALL_DEAD;
goto abort;
default:
abort_code = RXKADNOAUTH;
goto abort;
}
}
if (conn->security->issue_challenge(conn) < 0) {
abort_code = RX_CALL_DEAD;
ret = -ENOMEM;
goto abort;
}
_leave("");
return;
abort:
_debug("abort %d, %d", ret, abort_code);
rxrpc_abort_connection(conn, -ret, abort_code);
_leave(" [aborted]");
}
/*
* connection-level event processor
*/
void rxrpc_process_connection(struct work_struct *work)
{
struct rxrpc_connection *conn =
container_of(work, struct rxrpc_connection, processor);
struct sk_buff *skb;
u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
_enter("{%d}", conn->debug_id);
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
rxrpc_see_skb(skb);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
case -EKEYEXPIRED:
case -EKEYREJECTED:
goto protocol_error;
case -EAGAIN:
goto requeue_and_leave;
case -ECONNABORTED:
default:
rxrpc_free_skb(skb);
break;
}
}
out:
rxrpc_put_connection(conn);
_leave("");
return;
requeue_and_leave:
skb_queue_head(&conn->rx_queue, skb);
goto out;
protocol_error:
if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
goto requeue_and_leave;
rxrpc_free_skb(skb);
_leave(" [EPROTO]");
goto out;
}
/*
* put a packet up for transport-level abort
*/
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
{
CHECK_SLAB_OKAY(&local->usage);
skb_queue_tail(&local->reject_queue, skb);
rxrpc_queue_local(local);
}
/*
* reject packets through the local endpoint
*/
void rxrpc_reject_packets(struct rxrpc_local *local)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
} sa;
struct rxrpc_skb_priv *sp;
struct rxrpc_wire_header whdr;
struct sk_buff *skb;
struct msghdr msg;
struct kvec iov[2];
size_t size;
__be32 code;
_enter("%d", local->debug_id);
iov[0].iov_base = &whdr;
iov[0].iov_len = sizeof(whdr);
iov[1].iov_base = &code;
iov[1].iov_len = sizeof(code);
size = sizeof(whdr) + sizeof(code);
msg.msg_name = &sa;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
memset(&sa, 0, sizeof(sa));
sa.sa.sa_family = local->srx.transport.family;
switch (sa.sa.sa_family) {
case AF_INET:
msg.msg_namelen = sizeof(sa.sin);
break;
default:
msg.msg_namelen = 0;
break;
}
memset(&whdr, 0, sizeof(whdr));
whdr.type = RXRPC_PACKET_TYPE_ABORT;
while ((skb = skb_dequeue(&local->reject_queue))) {
rxrpc_see_skb(skb);
sp = rxrpc_skb(skb);
switch (sa.sa.sa_family) {
case AF_INET:
sa.sin.sin_port = udp_hdr(skb)->source;
sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
code = htonl(skb->priority);
whdr.epoch = htonl(sp->hdr.epoch);
whdr.cid = htonl(sp->hdr.cid);
whdr.callNumber = htonl(sp->hdr.callNumber);
whdr.serviceId = htons(sp->hdr.serviceId);
whdr.flags = sp->hdr.flags;
whdr.flags ^= RXRPC_CLIENT_INITIATED;
whdr.flags &= RXRPC_CLIENT_INITIATED;
kernel_sendmsg(local->socket, &msg, iov, 2, size);
break;
default:
break;
}
rxrpc_free_skb(skb);
}
_leave("");
}