From d2944b1c66a502ada8aa67f508cd29ecbf035892 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:27 +0100 Subject: [PATCH 1/9] rxrpc: Use rxrpc_free_skb() rather than rxrpc_lose_skb() rxrpc_lose_skb() is now exactly the same as rxrpc_free_skb(), so remove it and use the latter instead. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/input.c | 2 +- net/rxrpc/skbuff.c | 15 --------------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ef9554131434..9ba87e4d15c7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1095,7 +1095,6 @@ void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); -void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4b86b6be6c1f..5b2626929822 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1176,7 +1176,7 @@ void rxrpc_data_ready(struct sock *udp_sk) static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); - rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + rxrpc_free_skb(skb, rxrpc_skb_rx_lost); return; } } diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index b8985d01876a..913dca65cc65 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -68,21 +68,6 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) } } -/* - * Note the injected loss of a socket buffer. - */ -void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) -{ - const void *here = __builtin_return_address(0); - if (skb) { - int n; - CHECK_SLAB_OKAY(&skb->users); - n = atomic_dec_return(select_skb_count(op)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); - kfree_skb(skb); - } -} - /* * Clear a queue of socket buffers. */ From b3cfb6f567be00450d33b68f743c066af017a012 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:27 +0100 Subject: [PATCH 2/9] rxrpc: Emit the data Tx trace line before transmitting Print the data Tx trace line before transmitting so that it appears before the trace lines indicating success or failure of the transmission. This makes the trace log less confusing. Signed-off-by: David Howells --- net/rxrpc/output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index e8fb8922bca8..993d4cd247f9 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -378,11 +378,13 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, if ((lose++ & 7) == 7) { ret = 0; lost = true; - goto done; } } - _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); + if (lost) + goto done; /* send the packet with the don't fragment bit set if we currently * think it's small enough */ @@ -415,8 +417,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, goto send_fragmentable; done: - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, - retrans, lost); if (ret >= 0) { if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = skb->tstamp; From 68eb64c3d2fd558c606dcff6d3e8a2701388a80f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:27 +0100 Subject: [PATCH 3/9] afs: Do better max capacity handling on address lists Note the maximum allocated capacity in an afs_addr_list struct and discard addresses that would exceed it in afs_merge_fs_addr{4,6}(). Also, since the current maximum capacity is less than 255, reduce the relevant members to bytes. Signed-off-by: David Howells --- fs/afs/addr_list.c | 19 +++++++++++-------- fs/afs/internal.h | 8 +++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 025a9a5e1c32..4dbb8af54668 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -17,11 +17,6 @@ #include "internal.h" #include "afs_fs.h" -//#define AFS_MAX_ADDRESSES -// ((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) / -// sizeof(struct sockaddr_rxrpc))) -#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) - /* * Release an address list. */ @@ -43,11 +38,15 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr, _enter("%u,%u,%u", nr, service, port); + if (nr > AFS_MAX_ADDRESSES) + nr = AFS_MAX_ADDRESSES; + alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL); if (!alist) return NULL; refcount_set(&alist->usage, 1); + alist->max_addrs = nr; for (i = 0; i < nr; i++) { struct sockaddr_rxrpc *srx = &alist->addrs[i]; @@ -109,8 +108,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } while (p < end); _debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES); - if (nr > AFS_MAX_ADDRESSES) - nr = AFS_MAX_ADDRESSES; alist = afs_alloc_addrlist(nr, service, port); if (!alist) @@ -180,7 +177,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } alist->nr_addrs++; - } while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES); + } while (p < end && alist->nr_addrs < alist->max_addrs); _leave(" = [nr %u]", alist->nr_addrs); return alist; @@ -241,6 +238,9 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) __be16 xport = htons(port); int i; + if (alist->nr_addrs >= alist->max_addrs) + return; + for (i = 0; i < alist->nr_ipv4; i++) { a = &alist->addrs[i].transport.sin6; if (xdr == a->sin6_addr.s6_addr32[3] && @@ -277,6 +277,9 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) __be16 xport = htons(port); int i, diff; + if (alist->nr_addrs >= alist->max_addrs) + return; + for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { a = &alist->addrs[i].transport.sin6; diff = memcmp(xdr, &a->sin6_addr, 16); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 871a228d7f37..8ae4e2ebb99a 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -73,12 +73,14 @@ struct afs_addr_list { struct rcu_head rcu; /* Must be first */ refcount_t usage; u32 version; /* Version */ - unsigned short nr_addrs; - unsigned short index; /* Address currently in use */ - unsigned short nr_ipv4; /* Number of IPv4 addresses */ + unsigned char max_addrs; + unsigned char nr_addrs; + unsigned char index; /* Address currently in use */ + unsigned char nr_ipv4; /* Number of IPv4 addresses */ unsigned long probed; /* Mask of servers that have been probed */ unsigned long yfs; /* Mask of servers that are YFS */ struct sockaddr_rxrpc addrs[]; +#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8)) }; /* From 4c19bbdc7f7c76da14a7192072c47c3b9b582e80 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:27 +0100 Subject: [PATCH 4/9] afs: Always build address lists using the helper functions Make the address list string parser use the helper functions for adding addresses to an address list so that they end up appropriately sorted. This will better handles overruns and make them easier to compare. It also reduces the number of places that addresses are handled, making it easier to fix the handling. Signed-off-by: David Howells --- fs/afs/addr_list.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 4dbb8af54668..00e87f859b9f 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -116,8 +116,10 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, /* Extract the addresses */ p = text; do { - struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; const char *q, *stop; + unsigned int xport = port; + __be32 x[4]; + int family; if (*p == delim) { p++; @@ -133,19 +135,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, break; } - if (in4_pton(p, q - p, - (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - -1, &stop)) { - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(p, q - p, - srx->transport.sin6.sin6_addr.s6_addr, - -1, &stop)) { - /* Nothing to do */ - } else { + if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) + family = AF_INET; + else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) + family = AF_INET6; + else goto bad_address; - } if (stop != q) goto bad_address; @@ -157,7 +152,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, if (p < end) { if (*p == '+') { /* Port number specification "+1234" */ - unsigned int xport = 0; + xport = 0; p++; if (p >= end || !isdigit(*p)) goto bad_address; @@ -168,7 +163,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, goto bad_address; p++; } while (p < end && isdigit(*p)); - srx->transport.sin6.sin6_port = htons(xport); } else if (*p == delim) { p++; } else { @@ -176,8 +170,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, } } - alist->nr_addrs++; - } while (p < end && alist->nr_addrs < alist->max_addrs); + if (family == AF_INET) + afs_merge_fs_addr4(alist, x[0], xport); + else + afs_merge_fs_addr6(alist, x, xport); + + } while (p < end); _leave(" = [nr %u]", alist->nr_addrs); return alist; From 66be646bd9a7d50961afbf48c1d0df148e37d416 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:28 +0100 Subject: [PATCH 5/9] afs: Sort address lists so that they are in logical ascending order Sort address lists so that they are in logical ascending order rather than being partially in ascending order of the BE representations of those values. Signed-off-by: David Howells --- fs/afs/addr_list.c | 51 ++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 00e87f859b9f..89374de4785c 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -232,22 +232,23 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) */ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) { - struct sockaddr_in6 *a; - __be16 xport = htons(port); + struct sockaddr_in6 *p; + u32 addr = ntohl(xdr); int i; if (alist->nr_addrs >= alist->max_addrs) return; for (i = 0; i < alist->nr_ipv4; i++) { - a = &alist->addrs[i].transport.sin6; - if (xdr == a->sin6_addr.s6_addr32[3] && - xport == a->sin6_port) + struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; + u32 a_addr = ntohl(a->sin6_addr.s6_addr32[3]); + u16 a_port = ntohs(a->sin6_port); + + if (addr == a_addr && port == a_port) return; - if (xdr == a->sin6_addr.s6_addr32[3] && - (u16 __force)xport < (u16 __force)a->sin6_port) + if (addr == a_addr && port < a_port) break; - if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3]) + if (addr < a_addr) break; } @@ -256,12 +257,12 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - a = &alist->addrs[i].transport.sin6; - a->sin6_port = xport; - a->sin6_addr.s6_addr32[0] = 0; - a->sin6_addr.s6_addr32[1] = 0; - a->sin6_addr.s6_addr32[2] = htonl(0xffff); - a->sin6_addr.s6_addr32[3] = xdr; + p = &alist->addrs[i].transport.sin6; + p->sin6_port = htons(port); + p->sin6_addr.s6_addr32[0] = 0; + p->sin6_addr.s6_addr32[1] = 0; + p->sin6_addr.s6_addr32[2] = htonl(0xffff); + p->sin6_addr.s6_addr32[3] = xdr; alist->nr_ipv4++; alist->nr_addrs++; } @@ -271,21 +272,20 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) */ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) { - struct sockaddr_in6 *a; - __be16 xport = htons(port); + struct sockaddr_in6 *p; int i, diff; if (alist->nr_addrs >= alist->max_addrs) return; for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) { - a = &alist->addrs[i].transport.sin6; + struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; + u16 a_port = ntohs(a->sin6_port); + diff = memcmp(xdr, &a->sin6_addr, 16); - if (diff == 0 && - xport == a->sin6_port) + if (diff == 0 && port == a_port) return; - if (diff == 0 && - (u16 __force)xport < (u16 __force)a->sin6_port) + if (diff == 0 && port < a_port) break; if (diff < 0) break; @@ -296,12 +296,9 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - a = &alist->addrs[i].transport.sin6; - a->sin6_port = xport; - a->sin6_addr.s6_addr32[0] = xdr[0]; - a->sin6_addr.s6_addr32[1] = xdr[1]; - a->sin6_addr.s6_addr32[2] = xdr[2]; - a->sin6_addr.s6_addr32[3] = xdr[3]; + p = &alist->addrs[i].transport.sin6; + p->sin6_port = htons(port); + memcpy(&p->sin6_addr, xdr, 16); alist->nr_addrs++; } From 46894a13599a977ac35411b536fb3e0b2feefa95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:28 +0100 Subject: [PATCH 6/9] rxrpc: Use IPv4 addresses throught the IPv6 AF_RXRPC opens an IPv6 socket through which to send and receive network packets, both IPv6 and IPv4. It currently turns AF_INET addresses into AF_INET-as-AF_INET6 addresses based on an assumption that this was necessary; on further inspection of the code, however, it turns out that the IPv6 code just farms packets aimed at AF_INET addresses out to the IPv4 code. Fix AF_RXRPC to use AF_INET addresses directly when given them. Fixes: 7b674e390e51 ("rxrpc: Fix IPv6 support") Signed-off-by: David Howells --- fs/afs/addr_list.c | 29 +++++++++++++++-------------- net/rxrpc/af_rxrpc.c | 3 ++- net/rxrpc/conn_object.c | 5 +++-- net/rxrpc/peer_event.c | 12 +++++++----- net/rxrpc/utils.c | 19 +++++-------------- 5 files changed, 32 insertions(+), 36 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 89374de4785c..55a756c60746 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -232,7 +232,7 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry) */ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) { - struct sockaddr_in6 *p; + struct sockaddr_rxrpc *srx; u32 addr = ntohl(xdr); int i; @@ -240,9 +240,9 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) return; for (i = 0; i < alist->nr_ipv4; i++) { - struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6; - u32 a_addr = ntohl(a->sin6_addr.s6_addr32[3]); - u16 a_port = ntohs(a->sin6_port); + struct sockaddr_in *a = &alist->addrs[i].transport.sin; + u32 a_addr = ntohl(a->sin_addr.s_addr); + u16 a_port = ntohs(a->sin_port); if (addr == a_addr && port == a_port) return; @@ -257,12 +257,11 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - p = &alist->addrs[i].transport.sin6; - p->sin6_port = htons(port); - p->sin6_addr.s6_addr32[0] = 0; - p->sin6_addr.s6_addr32[1] = 0; - p->sin6_addr.s6_addr32[2] = htonl(0xffff); - p->sin6_addr.s6_addr32[3] = xdr; + srx = &alist->addrs[i]; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = htons(port); + srx->transport.sin.sin_addr.s_addr = xdr; alist->nr_ipv4++; alist->nr_addrs++; } @@ -272,7 +271,7 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port) */ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) { - struct sockaddr_in6 *p; + struct sockaddr_rxrpc *srx; int i, diff; if (alist->nr_addrs >= alist->max_addrs) @@ -296,9 +295,11 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port) alist->addrs + i, sizeof(alist->addrs[0]) * (alist->nr_addrs - i)); - p = &alist->addrs[i].transport.sin6; - p->sin6_port = htons(port); - memcpy(&p->sin6_addr, xdr, 16); + srx = &alist->addrs[i]; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = htons(port); + memcpy(&srx->transport.sin6.sin6_addr, xdr, 16); alist->nr_addrs++; } diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index ac44d8afffb1..2fdd276f6842 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -97,7 +97,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, srx->transport_len > len) return -EINVAL; - if (srx->transport.family != rx->family) + if (srx->transport.family != rx->family && + srx->transport.family == AF_INET && rx->family != AF_INET6) return -EAFNOSUPPORT; switch (srx->transport.family) { diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 885dae829f4a..fb856a1ccda8 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -89,8 +89,9 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) goto not_found; - /* We may have to handle mixing IPv4 and IPv6 */ - if (srx.transport.family != local->srx.transport.family) { + if (srx.transport.family != local->srx.transport.family && + (srx.transport.family == AF_INET && + local->srx.transport.family != AF_INET6)) { pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", srx.transport.family, local->srx.transport.family); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index f3e6fc670da2..81a7869325a6 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -47,6 +47,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, */ switch (srx->transport.family) { case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: @@ -70,20 +72,20 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: - srx->transport.sin6.sin6_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6"); + srx->transport.sin6.sin6_port = serr->port; memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12, + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = serr->port; + memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index e801171fa351..017adaa54e90 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -25,20 +25,11 @@ int rxrpc_extract_addr_from_skb(struct rxrpc_local *local, switch (ntohs(skb->protocol)) { case ETH_P_IP: - if (local->srx.transport.family == AF_INET6) { - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin6); - srx->transport.sin6.sin6_family = AF_INET6; - srx->transport.sin6.sin6_port = udp_hdr(skb)->source; - srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr; - } else { - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.sin.sin_family = AF_INET; - srx->transport.sin.sin_port = udp_hdr(skb)->source; - srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - } + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = udp_hdr(skb)->source; + srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; #ifdef CONFIG_AF_RXRPC_IPV6 From 5a790b7375414cffb0f7e8ab0f175d2e02a0af0e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:32:28 +0100 Subject: [PATCH 7/9] rxrpc: Drop the local endpoint arg from rxrpc_extract_addr_from_skb() rxrpc_extract_addr_from_skb() doesn't use the argument that points to the local endpoint, so remove the argument. Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 3 +-- net/rxrpc/call_accept.c | 2 +- net/rxrpc/conn_object.c | 2 +- net/rxrpc/local_event.c | 2 +- net/rxrpc/output.c | 2 +- net/rxrpc/utils.c | 4 +--- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9ba87e4d15c7..76569c178915 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1111,8 +1111,7 @@ static inline void rxrpc_sysctl_exit(void) {} /* * utils.c */ -int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *, - struct sk_buff *); +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); static inline bool before(u32 seq1, u32 seq2) { diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9c7f26d06a52..8354cadbb839 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) return NULL; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index fb856a1ccda8..c332722820c2 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -86,7 +86,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) goto not_found; if (srx.transport.family != local->srx.transport.family && diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 13bd8a4dfac7..927ead43df42 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) return; msg.msg_name = &srx.transport; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 993d4cd247f9..0f0b499d1202 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -561,7 +561,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) continue; } - if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { msg.msg_namelen = srx.transport_len; whdr.epoch = htonl(sp->hdr.epoch); diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index 017adaa54e90..ff7af71c4b49 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -17,9 +17,7 @@ /* * Fill out a peer address from a socket buffer containing a packet. */ -int rxrpc_extract_addr_from_skb(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) { memset(srx, 0, sizeof(*srx)); From 2070a3e44962212d6ef02c5def821b1b9744e496 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:42:29 +0100 Subject: [PATCH 8/9] rxrpc: Allow the reply time to be obtained on a client call Allow the timestamp on the sk_buff holding the first DATA packet of a reply to be queried. This can then be used as a base for the expiry time calculation on the callback promise duration indicated by an operation result. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 11 ++++++++ include/net/af_rxrpc.h | 3 +++ net/rxrpc/recvmsg.c | 43 ++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index b5407163d53b..67879992b4c2 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1069,6 +1069,17 @@ The kernel interface functions are as follows: This function may transmit a PING ACK. + (*) Get reply timestamp. + + bool rxrpc_kernel_get_reply_time(struct socket *sock, + struct rxrpc_call *call, + ktime_t *_ts) + + This allows the timestamp on the first DATA packet of the reply of a + client call to be queried, provided that it is still in the Rx ring. If + successful, the timestamp will be stored into *_ts and true will be + returned; false will be returned otherwise. + ======================= CONFIGURABLE PARAMETERS diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index f53edb3754bc..c4c912554dee 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -13,6 +13,7 @@ #define _NET_RXRPC_H #include +#include struct key; struct sock; @@ -77,5 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, + ktime_t *); #endif /* _NET_RXRPC_H */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 816b19a78809..eaf19ebaa964 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -715,3 +715,46 @@ call_complete: goto out; } EXPORT_SYMBOL(rxrpc_kernel_recv_data); + +/** + * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet + * @sock: The socket that the call exists on + * @call: The call to query + * @_ts: Where to put the timestamp + * + * Retrieve the timestamp from the first DATA packet of the reply if it is + * in the ring. Returns true if successful, false if not. + */ +bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call, + ktime_t *_ts) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + bool success = false; + + mutex_lock(&call->user_mutex); + + if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY) + goto out; + + hard_ack = call->rx_hard_ack; + if (hard_ack != 0) + goto out; + + seq = hard_ack + 1; + top = smp_load_acquire(&call->rx_top); + if (after(seq, top)) + goto out; + + skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK]; + if (!skb) + goto out; + + *_ts = skb_get_ktime(skb); + success = true; + +out: + mutex_unlock(&call->user_mutex); + return success; +} +EXPORT_SYMBOL(rxrpc_kernel_get_reply_time); From e908bcf4f1a271e7c264dcbffc5881ced8bfacee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 Oct 2018 09:54:29 +0100 Subject: [PATCH 9/9] rxrpc: Allow the reply time to be obtained on a client call Allow the epoch value to be queried on a server connection. This is in the rxrpc header of every packet for use in routing and is derived from the client's state. It's also not supposed to change unless the client gets restarted. AFS can make use of this information to deduce whether a fileserver has been restarted because the fileserver makes client calls to the filesystem driver's cache manager to send notifications (ie. callback breaks) about conflicting changes from other clients. These convey the fileserver's own epoch value back to the filesystem. Signed-off-by: David Howells --- Documentation/networking/rxrpc.txt | 14 ++++++++++++++ include/net/af_rxrpc.h | 1 + net/rxrpc/af_rxrpc.c | 14 ++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 67879992b4c2..605e00cdd6be 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1080,6 +1080,20 @@ The kernel interface functions are as follows: successful, the timestamp will be stored into *_ts and true will be returned; false will be returned otherwise. + (*) Get remote client epoch. + + u32 rxrpc_kernel_get_epoch(struct socket *sock, + struct rxrpc_call *call) + + This allows the epoch that's contained in packets of an incoming client + call to be queried. This value is returned. The function always + successful if the call is still in progress. It shouldn't be called once + the call has expired. Note that calling this on a local client call only + returns the local epoch. + + This value can be used to determine if the remote client has been + restarted as it shouldn't change otherwise. + ======================= CONFIGURABLE PARAMETERS diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index c4c912554dee..de587948042a 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -78,6 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2fdd276f6842..013dbcb052e5 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -385,6 +385,20 @@ u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call) } EXPORT_SYMBOL(rxrpc_kernel_check_life); +/** + * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. + * @sock: The socket the call is on + * @call: The call to query + * + * Allow a kernel service to retrieve the epoch value from a service call to + * see if the client at the other end rebooted. + */ +u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) +{ + return call->conn->proto.epoch; +} +EXPORT_SYMBOL(rxrpc_kernel_get_epoch); + /** * rxrpc_kernel_check_call - Check a call's state * @sock: The socket the call is on