net: add SOCK_RCU_FREE socket flag
We want a generic way to insert an RCU grace period before socket freeing for cases where RCU_SLAB_DESTROY_BY_RCU is adding too much overhead. SLAB_DESTROY_BY_RCU strict rules force us to take a reference on the socket sk_refcnt, and it is a performance problem for UDP encapsulation, or TCP synflood behavior, as many CPUs might attempt the atomic operations on a shared sk_refcnt UDP sockets and TCP listeners can set SOCK_RCU_FREE so that their lookup can use traditional RCU rules, without refcount changes. They can set the flag only once hashed and visible by other cpus. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Tom Herbert <tom@herbertland.com> Tested-by: Tom Herbert <tom@herbertland.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
		
							parent
							
								
									43e2dfb23e
								
							
						
					
					
						commit
						a4298e4522
					
				| @ -438,6 +438,7 @@ struct sock { | |||||||
| 						  struct sk_buff *skb); | 						  struct sk_buff *skb); | ||||||
| 	void                    (*sk_destruct)(struct sock *sk); | 	void                    (*sk_destruct)(struct sock *sk); | ||||||
| 	struct sock_reuseport __rcu	*sk_reuseport_cb; | 	struct sock_reuseport __rcu	*sk_reuseport_cb; | ||||||
|  | 	struct rcu_head		sk_rcu; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) | #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) | ||||||
| @ -720,6 +721,7 @@ enum sock_flags { | |||||||
| 		     */ | 		     */ | ||||||
| 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ | 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */ | ||||||
| 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ | 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ | ||||||
|  | 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) | #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) | ||||||
|  | |||||||
| @ -1419,8 +1419,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||||||
| } | } | ||||||
| EXPORT_SYMBOL(sk_alloc); | EXPORT_SYMBOL(sk_alloc); | ||||||
| 
 | 
 | ||||||
| void sk_destruct(struct sock *sk) | /* Sockets having SOCK_RCU_FREE will call this function after one RCU
 | ||||||
|  |  * grace period. This is the case for UDP sockets and TCP listeners. | ||||||
|  |  */ | ||||||
|  | static void __sk_destruct(struct rcu_head *head) | ||||||
| { | { | ||||||
|  | 	struct sock *sk = container_of(head, struct sock, sk_rcu); | ||||||
| 	struct sk_filter *filter; | 	struct sk_filter *filter; | ||||||
| 
 | 
 | ||||||
| 	if (sk->sk_destruct) | 	if (sk->sk_destruct) | ||||||
| @ -1449,6 +1453,14 @@ void sk_destruct(struct sock *sk) | |||||||
| 	sk_prot_free(sk->sk_prot_creator, sk); | 	sk_prot_free(sk->sk_prot_creator, sk); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void sk_destruct(struct sock *sk) | ||||||
|  | { | ||||||
|  | 	if (sock_flag(sk, SOCK_RCU_FREE)) | ||||||
|  | 		call_rcu(&sk->sk_rcu, __sk_destruct); | ||||||
|  | 	else | ||||||
|  | 		__sk_destruct(&sk->sk_rcu); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void __sk_free(struct sock *sk) | static void __sk_free(struct sock *sk) | ||||||
| { | { | ||||||
| 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) | 	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user