ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob
Different namespace application might require fast recycling TIME-WAIT sockets independently of the host. Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
801822d1be
commit
1946e672c1
@@ -29,16 +29,6 @@
|
|||||||
|
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
struct inet_hashinfo;
|
|
||||||
|
|
||||||
struct inet_timewait_death_row {
|
|
||||||
atomic_t tw_count;
|
|
||||||
|
|
||||||
struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
|
|
||||||
int sysctl_tw_recycle;
|
|
||||||
int sysctl_max_tw_buckets;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct inet_bind_bucket;
|
struct inet_bind_bucket;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -125,8 +115,7 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo
|
|||||||
|
|
||||||
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
|
void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
|
||||||
|
|
||||||
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
|
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);
|
||||||
struct inet_timewait_death_row *twdr, int family);
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
struct net *twsk_net(const struct inet_timewait_sock *twsk)
|
struct net *twsk_net(const struct inet_timewait_sock *twsk)
|
||||||
|
|||||||
@@ -27,6 +27,16 @@ struct ping_group_range {
|
|||||||
kgid_t range[2];
|
kgid_t range[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct inet_hashinfo;
|
||||||
|
|
||||||
|
struct inet_timewait_death_row {
|
||||||
|
atomic_t tw_count;
|
||||||
|
|
||||||
|
struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp;
|
||||||
|
int sysctl_tw_recycle;
|
||||||
|
int sysctl_max_tw_buckets;
|
||||||
|
};
|
||||||
|
|
||||||
struct netns_ipv4 {
|
struct netns_ipv4 {
|
||||||
#ifdef CONFIG_SYSCTL
|
#ifdef CONFIG_SYSCTL
|
||||||
struct ctl_table_header *forw_hdr;
|
struct ctl_table_header *forw_hdr;
|
||||||
@@ -111,6 +121,7 @@ struct netns_ipv4 {
|
|||||||
int sysctl_tcp_fin_timeout;
|
int sysctl_tcp_fin_timeout;
|
||||||
unsigned int sysctl_tcp_notsent_lowat;
|
unsigned int sysctl_tcp_notsent_lowat;
|
||||||
int sysctl_tcp_tw_reuse;
|
int sysctl_tcp_tw_reuse;
|
||||||
|
struct inet_timewait_death_row tcp_death_row;
|
||||||
|
|
||||||
int sysctl_igmp_max_memberships;
|
int sysctl_igmp_max_memberships;
|
||||||
int sysctl_igmp_max_msf;
|
int sysctl_igmp_max_msf;
|
||||||
|
|||||||
@@ -231,7 +231,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
|
|||||||
*/
|
*/
|
||||||
#define TFO_SERVER_WO_SOCKOPT1 0x400
|
#define TFO_SERVER_WO_SOCKOPT1 0x400
|
||||||
|
|
||||||
extern struct inet_timewait_death_row tcp_death_row;
|
|
||||||
|
|
||||||
/* sysctl variables for tcp */
|
/* sysctl variables for tcp */
|
||||||
extern int sysctl_tcp_timestamps;
|
extern int sysctl_tcp_timestamps;
|
||||||
|
|||||||
@@ -1831,8 +1831,6 @@ static int __init inet_init(void)
|
|||||||
|
|
||||||
ip_init();
|
ip_init();
|
||||||
|
|
||||||
tcp_v4_init();
|
|
||||||
|
|
||||||
/* Setup TCP slab cache for open requests. */
|
/* Setup TCP slab cache for open requests. */
|
||||||
tcp_init();
|
tcp_init();
|
||||||
|
|
||||||
|
|||||||
@@ -257,8 +257,7 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
|
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
|
||||||
|
|
||||||
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
|
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
|
||||||
struct inet_timewait_death_row *twdr, int family)
|
|
||||||
{
|
{
|
||||||
struct inet_timewait_sock *tw;
|
struct inet_timewait_sock *tw;
|
||||||
struct sock *sk;
|
struct sock *sk;
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
|
|||||||
socket_seq_show(seq);
|
socket_seq_show(seq);
|
||||||
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
|
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
|
||||||
sock_prot_inuse_get(net, &tcp_prot), orphans,
|
sock_prot_inuse_get(net, &tcp_prot), orphans,
|
||||||
atomic_read(&tcp_death_row.tw_count), sockets,
|
atomic_read(&net->ipv4.tcp_death_row.tw_count), sockets,
|
||||||
proto_memory_allocated(&tcp_prot));
|
proto_memory_allocated(&tcp_prot));
|
||||||
seq_printf(seq, "UDP: inuse %d mem %ld\n",
|
seq_printf(seq, "UDP: inuse %d mem %ld\n",
|
||||||
sock_prot_inuse_get(net, &udp_prot),
|
sock_prot_inuse_get(net, &udp_prot),
|
||||||
|
|||||||
@@ -289,13 +289,6 @@ static struct ctl_table ipv4_table[] = {
|
|||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec
|
.proc_handler = proc_dointvec
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.procname = "tcp_max_tw_buckets",
|
|
||||||
.data = &tcp_death_row.sysctl_max_tw_buckets,
|
|
||||||
.maxlen = sizeof(int),
|
|
||||||
.mode = 0644,
|
|
||||||
.proc_handler = proc_dointvec
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.procname = "tcp_fastopen",
|
.procname = "tcp_fastopen",
|
||||||
.data = &sysctl_tcp_fastopen,
|
.data = &sysctl_tcp_fastopen,
|
||||||
@@ -309,13 +302,6 @@ static struct ctl_table ipv4_table[] = {
|
|||||||
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
|
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
|
||||||
.proc_handler = proc_tcp_fastopen_key,
|
.proc_handler = proc_tcp_fastopen_key,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
.procname = "tcp_tw_recycle",
|
|
||||||
.data = &tcp_death_row.sysctl_tw_recycle,
|
|
||||||
.maxlen = sizeof(int),
|
|
||||||
.mode = 0644,
|
|
||||||
.proc_handler = proc_dointvec
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
.procname = "tcp_abort_on_overflow",
|
.procname = "tcp_abort_on_overflow",
|
||||||
.data = &sysctl_tcp_abort_on_overflow,
|
.data = &sysctl_tcp_abort_on_overflow,
|
||||||
@@ -960,6 +946,20 @@ static struct ctl_table ipv4_net_table[] = {
|
|||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec
|
.proc_handler = proc_dointvec
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "tcp_max_tw_buckets",
|
||||||
|
.data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.procname = "tcp_tw_recycle",
|
||||||
|
.data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec
|
||||||
|
},
|
||||||
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
#ifdef CONFIG_IP_ROUTE_MULTIPATH
|
||||||
{
|
{
|
||||||
.procname = "fib_multipath_use_neigh",
|
.procname = "fib_multipath_use_neigh",
|
||||||
|
|||||||
@@ -3334,6 +3334,7 @@ void __init tcp_init(void)
|
|||||||
|
|
||||||
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
|
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
|
||||||
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
|
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
|
||||||
|
inet_hashinfo_init(&tcp_hashinfo);
|
||||||
tcp_hashinfo.bind_bucket_cachep =
|
tcp_hashinfo.bind_bucket_cachep =
|
||||||
kmem_cache_create("tcp_bind_bucket",
|
kmem_cache_create("tcp_bind_bucket",
|
||||||
sizeof(struct inet_bind_bucket), 0,
|
sizeof(struct inet_bind_bucket), 0,
|
||||||
@@ -3378,7 +3379,6 @@ void __init tcp_init(void)
|
|||||||
|
|
||||||
cnt = tcp_hashinfo.ehash_mask + 1;
|
cnt = tcp_hashinfo.ehash_mask + 1;
|
||||||
|
|
||||||
tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
|
|
||||||
sysctl_tcp_max_orphans = cnt / 2;
|
sysctl_tcp_max_orphans = cnt / 2;
|
||||||
sysctl_max_syn_backlog = max(128, cnt / 256);
|
sysctl_max_syn_backlog = max(128, cnt / 256);
|
||||||
|
|
||||||
@@ -3399,6 +3399,7 @@ void __init tcp_init(void)
|
|||||||
pr_info("Hash tables configured (established %u bind %u)\n",
|
pr_info("Hash tables configured (established %u bind %u)\n",
|
||||||
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
|
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
|
||||||
|
|
||||||
|
tcp_v4_init();
|
||||||
tcp_metrics_init();
|
tcp_metrics_init();
|
||||||
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
|
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
|
||||||
tcp_tasklet_init();
|
tcp_tasklet_init();
|
||||||
|
|||||||
@@ -6363,7 +6363,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
|
|||||||
* timewait bucket, so that all the necessary checks
|
* timewait bucket, so that all the necessary checks
|
||||||
* are made in the function processing timewait state.
|
* are made in the function processing timewait state.
|
||||||
*/
|
*/
|
||||||
if (tcp_death_row.sysctl_tw_recycle) {
|
if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
|
||||||
bool strict;
|
bool strict;
|
||||||
|
|
||||||
dst = af_ops->route_req(sk, &fl, req, &strict);
|
dst = af_ops->route_req(sk, &fl, req, &strict);
|
||||||
|
|||||||
@@ -146,6 +146,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|||||||
struct rtable *rt;
|
struct rtable *rt;
|
||||||
int err;
|
int err;
|
||||||
struct ip_options_rcu *inet_opt;
|
struct ip_options_rcu *inet_opt;
|
||||||
|
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||||
|
|
||||||
if (addr_len < sizeof(struct sockaddr_in))
|
if (addr_len < sizeof(struct sockaddr_in))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -196,7 +197,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|||||||
tp->write_seq = 0;
|
tp->write_seq = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tcp_death_row.sysctl_tw_recycle &&
|
if (tcp_death_row->sysctl_tw_recycle &&
|
||||||
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
|
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
|
||||||
tcp_fetch_timewait_stamp(sk, &rt->dst);
|
tcp_fetch_timewait_stamp(sk, &rt->dst);
|
||||||
|
|
||||||
@@ -215,7 +216,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
|
|||||||
* complete initialization after this.
|
* complete initialization after this.
|
||||||
*/
|
*/
|
||||||
tcp_set_state(sk, TCP_SYN_SENT);
|
tcp_set_state(sk, TCP_SYN_SENT);
|
||||||
err = inet_hash_connect(&tcp_death_row, sk);
|
err = inet_hash_connect(tcp_death_row, sk);
|
||||||
if (err)
|
if (err)
|
||||||
goto failure;
|
goto failure;
|
||||||
|
|
||||||
@@ -2457,6 +2458,10 @@ static int __net_init tcp_sk_init(struct net *net)
|
|||||||
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
|
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
|
||||||
net->ipv4.sysctl_tcp_tw_reuse = 0;
|
net->ipv4.sysctl_tcp_tw_reuse = 0;
|
||||||
|
|
||||||
|
net->ipv4.tcp_death_row.sysctl_tw_recycle = 0;
|
||||||
|
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (tcp_hashinfo.ehash_mask + 1) / 2;
|
||||||
|
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
fail:
|
fail:
|
||||||
tcp_sk_exit(net);
|
tcp_sk_exit(net);
|
||||||
@@ -2466,7 +2471,7 @@ fail:
|
|||||||
|
|
||||||
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
|
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
|
||||||
{
|
{
|
||||||
inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
|
inet_twsk_purge(&tcp_hashinfo, AF_INET);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pernet_operations __net_initdata tcp_sk_ops = {
|
static struct pernet_operations __net_initdata tcp_sk_ops = {
|
||||||
@@ -2477,7 +2482,6 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
|
|||||||
|
|
||||||
void __init tcp_v4_init(void)
|
void __init tcp_v4_init(void)
|
||||||
{
|
{
|
||||||
inet_hashinfo_init(&tcp_hashinfo);
|
|
||||||
if (register_pernet_subsys(&tcp_sk_ops))
|
if (register_pernet_subsys(&tcp_sk_ops))
|
||||||
panic("Failed to create the TCP control socket.\n");
|
panic("Failed to create the TCP control socket.\n");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,12 +29,6 @@
|
|||||||
|
|
||||||
int sysctl_tcp_abort_on_overflow __read_mostly;
|
int sysctl_tcp_abort_on_overflow __read_mostly;
|
||||||
|
|
||||||
struct inet_timewait_death_row tcp_death_row = {
|
|
||||||
.sysctl_max_tw_buckets = NR_FILE * 2,
|
|
||||||
.hashinfo = &tcp_hashinfo,
|
|
||||||
};
|
|
||||||
EXPORT_SYMBOL_GPL(tcp_death_row);
|
|
||||||
|
|
||||||
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
||||||
{
|
{
|
||||||
if (seq == s_win)
|
if (seq == s_win)
|
||||||
@@ -100,6 +94,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||||||
struct tcp_options_received tmp_opt;
|
struct tcp_options_received tmp_opt;
|
||||||
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
||||||
bool paws_reject = false;
|
bool paws_reject = false;
|
||||||
|
struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row;
|
||||||
|
|
||||||
tmp_opt.saw_tstamp = 0;
|
tmp_opt.saw_tstamp = 0;
|
||||||
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
|
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
|
||||||
@@ -153,7 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
|
|||||||
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tcp_death_row.sysctl_tw_recycle &&
|
if (tcp_death_row->sysctl_tw_recycle &&
|
||||||
tcptw->tw_ts_recent_stamp &&
|
tcptw->tw_ts_recent_stamp &&
|
||||||
tcp_tw_remember_stamp(tw))
|
tcp_tw_remember_stamp(tw))
|
||||||
inet_twsk_reschedule(tw, tw->tw_timeout);
|
inet_twsk_reschedule(tw, tw->tw_timeout);
|
||||||
@@ -264,11 +259,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
struct inet_timewait_sock *tw;
|
struct inet_timewait_sock *tw;
|
||||||
bool recycle_ok = false;
|
bool recycle_ok = false;
|
||||||
|
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||||
|
|
||||||
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
|
if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
|
||||||
recycle_ok = tcp_remember_stamp(sk);
|
recycle_ok = tcp_remember_stamp(sk);
|
||||||
|
|
||||||
tw = inet_twsk_alloc(sk, &tcp_death_row, state);
|
tw = inet_twsk_alloc(sk, tcp_death_row, state);
|
||||||
|
|
||||||
if (tw) {
|
if (tw) {
|
||||||
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
|
||||||
|
|||||||
@@ -123,6 +123,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
struct dst_entry *dst;
|
struct dst_entry *dst;
|
||||||
int addr_type;
|
int addr_type;
|
||||||
int err;
|
int err;
|
||||||
|
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
|
||||||
|
|
||||||
if (addr_len < SIN6_LEN_RFC2133)
|
if (addr_len < SIN6_LEN_RFC2133)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -258,7 +259,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
sk->sk_gso_type = SKB_GSO_TCPV6;
|
sk->sk_gso_type = SKB_GSO_TCPV6;
|
||||||
ip6_dst_store(sk, dst, NULL, NULL);
|
ip6_dst_store(sk, dst, NULL, NULL);
|
||||||
|
|
||||||
if (tcp_death_row.sysctl_tw_recycle &&
|
if (tcp_death_row->sysctl_tw_recycle &&
|
||||||
!tp->rx_opt.ts_recent_stamp &&
|
!tp->rx_opt.ts_recent_stamp &&
|
||||||
ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
|
ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
|
||||||
tcp_fetch_timewait_stamp(sk, dst);
|
tcp_fetch_timewait_stamp(sk, dst);
|
||||||
@@ -273,7 +274,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
|
|||||||
inet->inet_dport = usin->sin6_port;
|
inet->inet_dport = usin->sin6_port;
|
||||||
|
|
||||||
tcp_set_state(sk, TCP_SYN_SENT);
|
tcp_set_state(sk, TCP_SYN_SENT);
|
||||||
err = inet6_hash_connect(&tcp_death_row, sk);
|
err = inet6_hash_connect(tcp_death_row, sk);
|
||||||
if (err)
|
if (err)
|
||||||
goto late_failure;
|
goto late_failure;
|
||||||
|
|
||||||
@@ -1948,7 +1949,7 @@ static void __net_exit tcpv6_net_exit(struct net *net)
|
|||||||
|
|
||||||
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
|
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
|
||||||
{
|
{
|
||||||
inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
|
inet_twsk_purge(&tcp_hashinfo, AF_INET6);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct pernet_operations tcpv6_net_ops = {
|
static struct pernet_operations tcpv6_net_ops = {
|
||||||
|
|||||||
Reference in New Issue
Block a user