ip: convert tcp_sendmsg() to iov_iter primitives
patch is actually smaller than it seems to be - most of it is unindenting the inner loop body in tcp_sendmsg() itself... the bit in tcp_input.c is going to get reverted very soon - that's what memcpy_from_msg() will become, but not in this commit; let's keep it reasonably contained... There's one potentially subtle change here: in case of short copy from userland, mainline tcp_send_syn_data() discards the skb it has allocated and falls back to normal path, where we'll send as much as possible after rereading the same data again. This patch trims SYN+data skb instead - that way we don't need to copy from the same place twice. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
		
							parent
							
								
									cacdc7d2f9
								
							
						
					
					
						commit
						57be5bdad7
					
				| @ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) | ||||
| } | ||||
| 
 | ||||
| static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, | ||||
| 					   char __user *from, char *to, | ||||
| 					   struct iov_iter *from, char *to, | ||||
| 					   int copy, int offset) | ||||
| { | ||||
| 	if (skb->ip_summed == CHECKSUM_NONE) { | ||||
| 		int err = 0; | ||||
| 		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err); | ||||
| 		if (err) | ||||
| 			return err; | ||||
| 		__wsum csum = 0; | ||||
| 		if (csum_and_copy_from_iter(to, copy, &csum, from) != copy) | ||||
| 			return -EFAULT; | ||||
| 		skb->csum = csum_block_add(skb->csum, csum, offset); | ||||
| 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { | ||||
| 		if (!access_ok(VERIFY_READ, from, copy) || | ||||
| 		    __copy_from_user_nocache(to, from, copy)) | ||||
| 		if (copy_from_iter_nocache(to, copy, from) != copy) | ||||
| 			return -EFAULT; | ||||
| 	} else if (copy_from_user(to, from, copy)) | ||||
| 	} else if (copy_from_iter(to, copy, from) != copy) | ||||
| 		return -EFAULT; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, | ||||
| 				       char __user *from, int copy) | ||||
| 				       struct iov_iter *from, int copy) | ||||
| { | ||||
| 	int err, offset = skb->len; | ||||
| 
 | ||||
| @ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb, | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from, | ||||
| static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from, | ||||
| 					   struct sk_buff *skb, | ||||
| 					   struct page *page, | ||||
| 					   int off, int copy) | ||||
|  | ||||
							
								
								
									
										241
									
								
								net/ipv4/tcp.c
									
									
									
									
									
								
							
							
						
						
									
										241
									
								
								net/ipv4/tcp.c
									
									
									
									
									
								
							| @ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, | ||||
| int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||||
| 		size_t size) | ||||
| { | ||||
| 	const struct iovec *iov; | ||||
| 	struct tcp_sock *tp = tcp_sk(sk); | ||||
| 	struct sk_buff *skb; | ||||
| 	int iovlen, flags, err, copied = 0; | ||||
| 	int mss_now = 0, size_goal, copied_syn = 0, offset = 0; | ||||
| 	int flags, err, copied = 0; | ||||
| 	int mss_now = 0, size_goal, copied_syn = 0; | ||||
| 	bool sg; | ||||
| 	long timeo; | ||||
| 
 | ||||
| @ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||||
| 			goto out; | ||||
| 		else if (err) | ||||
| 			goto out_err; | ||||
| 		offset = copied_syn; | ||||
| 	} | ||||
| 
 | ||||
| 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | ||||
| @ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||||
| 	mss_now = tcp_send_mss(sk, &size_goal, flags); | ||||
| 
 | ||||
| 	/* Ok commence sending. */ | ||||
| 	iovlen = msg->msg_iter.nr_segs; | ||||
| 	iov = msg->msg_iter.iov; | ||||
| 	copied = 0; | ||||
| 
 | ||||
| 	err = -EPIPE; | ||||
| @ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | ||||
| 
 | ||||
| 	sg = !!(sk->sk_route_caps & NETIF_F_SG); | ||||
| 
 | ||||
| 	while (--iovlen >= 0) { | ||||
| 		size_t seglen = iov->iov_len; | ||||
| 		unsigned char __user *from = iov->iov_base; | ||||
| 	while (iov_iter_count(&msg->msg_iter)) { | ||||
| 		int copy = 0; | ||||
| 		int max = size_goal; | ||||
| 
 | ||||
| 		iov++; | ||||
| 		if (unlikely(offset > 0)) {  /* Skip bytes copied in SYN */ | ||||
| 			if (offset >= seglen) { | ||||
| 				offset -= seglen; | ||||
| 				continue; | ||||
| 			} | ||||
| 			seglen -= offset; | ||||
| 			from += offset; | ||||
| 			offset = 0; | ||||
| 		skb = tcp_write_queue_tail(sk); | ||||
| 		if (tcp_send_head(sk)) { | ||||
| 			if (skb->ip_summed == CHECKSUM_NONE) | ||||
| 				max = mss_now; | ||||
| 			copy = max - skb->len; | ||||
| 		} | ||||
| 
 | ||||
| 		while (seglen > 0) { | ||||
| 			int copy = 0; | ||||
| 			int max = size_goal; | ||||
| 
 | ||||
| 			skb = tcp_write_queue_tail(sk); | ||||
| 			if (tcp_send_head(sk)) { | ||||
| 				if (skb->ip_summed == CHECKSUM_NONE) | ||||
| 					max = mss_now; | ||||
| 				copy = max - skb->len; | ||||
| 			} | ||||
| 
 | ||||
| 			if (copy <= 0) { | ||||
| 		if (copy <= 0) { | ||||
| new_segment: | ||||
| 				/* Allocate new segment. If the interface is SG,
 | ||||
| 				 * allocate skb fitting to single page. | ||||
| 				 */ | ||||
| 				if (!sk_stream_memory_free(sk)) | ||||
| 					goto wait_for_sndbuf; | ||||
| 			/* Allocate new segment. If the interface is SG,
 | ||||
| 			 * allocate skb fitting to single page. | ||||
| 			 */ | ||||
| 			if (!sk_stream_memory_free(sk)) | ||||
| 				goto wait_for_sndbuf; | ||||
| 
 | ||||
| 				skb = sk_stream_alloc_skb(sk, | ||||
| 							  select_size(sk, sg), | ||||
| 							  sk->sk_allocation); | ||||
| 				if (!skb) | ||||
| 					goto wait_for_memory; | ||||
| 			skb = sk_stream_alloc_skb(sk, | ||||
| 						  select_size(sk, sg), | ||||
| 						  sk->sk_allocation); | ||||
| 			if (!skb) | ||||
| 				goto wait_for_memory; | ||||
| 
 | ||||
| 				/*
 | ||||
| 				 * Check whether we can use HW checksum. | ||||
| 				 */ | ||||
| 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM) | ||||
| 					skb->ip_summed = CHECKSUM_PARTIAL; | ||||
| 			/*
 | ||||
| 			 * Check whether we can use HW checksum. | ||||
| 			 */ | ||||
| 			if (sk->sk_route_caps & NETIF_F_ALL_CSUM) | ||||
| 				skb->ip_summed = CHECKSUM_PARTIAL; | ||||
| 
 | ||||
| 				skb_entail(sk, skb); | ||||
| 				copy = size_goal; | ||||
| 				max = size_goal; | ||||
| 			skb_entail(sk, skb); | ||||
| 			copy = size_goal; | ||||
| 			max = size_goal; | ||||
| 
 | ||||
| 				/* All packets are restored as if they have
 | ||||
| 				 * already been sent. skb_mstamp isn't set to | ||||
| 				 * avoid wrong rtt estimation. | ||||
| 				 */ | ||||
| 				if (tp->repair) | ||||
| 					TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; | ||||
| 			} | ||||
| 			/* All packets are restored as if they have
 | ||||
| 			 * already been sent. skb_mstamp isn't set to | ||||
| 			 * avoid wrong rtt estimation. | ||||
| 			 */ | ||||
| 			if (tp->repair) | ||||
| 				TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; | ||||
| 		} | ||||
| 
 | ||||
| 			/* Try to append data to the end of skb. */ | ||||
| 			if (copy > seglen) | ||||
| 				copy = seglen; | ||||
| 		/* Try to append data to the end of skb. */ | ||||
| 		if (copy > iov_iter_count(&msg->msg_iter)) | ||||
| 			copy = iov_iter_count(&msg->msg_iter); | ||||
| 
 | ||||
| 			/* Where to copy to? */ | ||||
| 			if (skb_availroom(skb) > 0) { | ||||
| 				/* We have some space in skb head. Superb! */ | ||||
| 				copy = min_t(int, copy, skb_availroom(skb)); | ||||
| 				err = skb_add_data_nocache(sk, skb, from, copy); | ||||
| 				if (err) | ||||
| 					goto do_fault; | ||||
| 			} else { | ||||
| 				bool merge = true; | ||||
| 				int i = skb_shinfo(skb)->nr_frags; | ||||
| 				struct page_frag *pfrag = sk_page_frag(sk); | ||||
| 		/* Where to copy to? */ | ||||
| 		if (skb_availroom(skb) > 0) { | ||||
| 			/* We have some space in skb head. Superb! */ | ||||
| 			copy = min_t(int, copy, skb_availroom(skb)); | ||||
| 			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); | ||||
| 			if (err) | ||||
| 				goto do_fault; | ||||
| 		} else { | ||||
| 			bool merge = true; | ||||
| 			int i = skb_shinfo(skb)->nr_frags; | ||||
| 			struct page_frag *pfrag = sk_page_frag(sk); | ||||
| 
 | ||||
| 				if (!sk_page_frag_refill(sk, pfrag)) | ||||
| 					goto wait_for_memory; | ||||
| 			if (!sk_page_frag_refill(sk, pfrag)) | ||||
| 				goto wait_for_memory; | ||||
| 
 | ||||
| 				if (!skb_can_coalesce(skb, i, pfrag->page, | ||||
| 						      pfrag->offset)) { | ||||
| 					if (i == MAX_SKB_FRAGS || !sg) { | ||||
| 						tcp_mark_push(tp, skb); | ||||
| 						goto new_segment; | ||||
| 					} | ||||
| 					merge = false; | ||||
| 			if (!skb_can_coalesce(skb, i, pfrag->page, | ||||
| 					      pfrag->offset)) { | ||||
| 				if (i == MAX_SKB_FRAGS || !sg) { | ||||
| 					tcp_mark_push(tp, skb); | ||||
| 					goto new_segment; | ||||
| 				} | ||||
| 
 | ||||
| 				copy = min_t(int, copy, pfrag->size - pfrag->offset); | ||||
| 
 | ||||
| 				if (!sk_wmem_schedule(sk, copy)) | ||||
| 					goto wait_for_memory; | ||||
| 
 | ||||
| 				err = skb_copy_to_page_nocache(sk, from, skb, | ||||
| 							       pfrag->page, | ||||
| 							       pfrag->offset, | ||||
| 							       copy); | ||||
| 				if (err) | ||||
| 					goto do_error; | ||||
| 
 | ||||
| 				/* Update the skb. */ | ||||
| 				if (merge) { | ||||
| 					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||||
| 				} else { | ||||
| 					skb_fill_page_desc(skb, i, pfrag->page, | ||||
| 							   pfrag->offset, copy); | ||||
| 					get_page(pfrag->page); | ||||
| 				} | ||||
| 				pfrag->offset += copy; | ||||
| 				merge = false; | ||||
| 			} | ||||
| 
 | ||||
| 			if (!copied) | ||||
| 				TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | ||||
| 			copy = min_t(int, copy, pfrag->size - pfrag->offset); | ||||
| 
 | ||||
| 			tp->write_seq += copy; | ||||
| 			TCP_SKB_CB(skb)->end_seq += copy; | ||||
| 			tcp_skb_pcount_set(skb, 0); | ||||
| 			if (!sk_wmem_schedule(sk, copy)) | ||||
| 				goto wait_for_memory; | ||||
| 
 | ||||
| 			from += copy; | ||||
| 			copied += copy; | ||||
| 			if ((seglen -= copy) == 0 && iovlen == 0) { | ||||
| 				tcp_tx_timestamp(sk, skb); | ||||
| 				goto out; | ||||
| 			} | ||||
| 
 | ||||
| 			if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) | ||||
| 				continue; | ||||
| 
 | ||||
| 			if (forced_push(tp)) { | ||||
| 				tcp_mark_push(tp, skb); | ||||
| 				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); | ||||
| 			} else if (skb == tcp_send_head(sk)) | ||||
| 				tcp_push_one(sk, mss_now); | ||||
| 			continue; | ||||
| 
 | ||||
| wait_for_sndbuf: | ||||
| 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||||
| wait_for_memory: | ||||
| 			if (copied) | ||||
| 				tcp_push(sk, flags & ~MSG_MORE, mss_now, | ||||
| 					 TCP_NAGLE_PUSH, size_goal); | ||||
| 
 | ||||
| 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | ||||
| 			err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, | ||||
| 						       pfrag->page, | ||||
| 						       pfrag->offset, | ||||
| 						       copy); | ||||
| 			if (err) | ||||
| 				goto do_error; | ||||
| 
 | ||||
| 			mss_now = tcp_send_mss(sk, &size_goal, flags); | ||||
| 			/* Update the skb. */ | ||||
| 			if (merge) { | ||||
| 				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); | ||||
| 			} else { | ||||
| 				skb_fill_page_desc(skb, i, pfrag->page, | ||||
| 						   pfrag->offset, copy); | ||||
| 				get_page(pfrag->page); | ||||
| 			} | ||||
| 			pfrag->offset += copy; | ||||
| 		} | ||||
| 
 | ||||
| 		if (!copied) | ||||
| 			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; | ||||
| 
 | ||||
| 		tp->write_seq += copy; | ||||
| 		TCP_SKB_CB(skb)->end_seq += copy; | ||||
| 		tcp_skb_pcount_set(skb, 0); | ||||
| 
 | ||||
| 		copied += copy; | ||||
| 		if (!iov_iter_count(&msg->msg_iter)) { | ||||
| 			tcp_tx_timestamp(sk, skb); | ||||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair)) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (forced_push(tp)) { | ||||
| 			tcp_mark_push(tp, skb); | ||||
| 			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); | ||||
| 		} else if (skb == tcp_send_head(sk)) | ||||
| 			tcp_push_one(sk, mss_now); | ||||
| 		continue; | ||||
| 
 | ||||
| wait_for_sndbuf: | ||||
| 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||||
| wait_for_memory: | ||||
| 		if (copied) | ||||
| 			tcp_push(sk, flags & ~MSG_MORE, mss_now, | ||||
| 				 TCP_NAGLE_PUSH, size_goal); | ||||
| 
 | ||||
| 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | ||||
| 			goto do_error; | ||||
| 
 | ||||
| 		mss_now = tcp_send_mss(sk, &size_goal, flags); | ||||
| 	} | ||||
| 
 | ||||
| out: | ||||
|  | ||||
| @ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | ||||
| 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) | ||||
| 		goto err_free; | ||||
| 
 | ||||
| 	if (memcpy_from_msg(skb_put(skb, size), msg, size)) | ||||
| 	if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size) | ||||
| 		goto err_free; | ||||
| 
 | ||||
| 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; | ||||
|  | ||||
| @ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | ||||
| { | ||||
| 	struct tcp_sock *tp = tcp_sk(sk); | ||||
| 	struct tcp_fastopen_request *fo = tp->fastopen_req; | ||||
| 	int syn_loss = 0, space, err = 0; | ||||
| 	int syn_loss = 0, space, err = 0, copied; | ||||
| 	unsigned long last_syn_loss = 0; | ||||
| 	struct sk_buff *syn_data; | ||||
| 
 | ||||
| @ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) | ||||
| 		goto fallback; | ||||
| 	syn_data->ip_summed = CHECKSUM_PARTIAL; | ||||
| 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); | ||||
| 	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), | ||||
| 					 fo->data->msg_iter.iov, 0, space))) { | ||||
| 	copied = copy_from_iter(skb_put(syn_data, space), space, | ||||
| 				&fo->data->msg_iter); | ||||
| 	if (unlikely(!copied)) { | ||||
| 		kfree_skb(syn_data); | ||||
| 		goto fallback; | ||||
| 	} | ||||
| 	if (copied != space) { | ||||
| 		skb_trim(syn_data, copied); | ||||
| 		space = copied; | ||||
| 	} | ||||
| 
 | ||||
| 	/* No more data pending in inet_wait_for_connect() */ | ||||
| 	if (space == fo->size) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user