diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 960de8fe3f40..163b5ff1073c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1442,6 +1442,14 @@ max_hbh_length - INTEGER header. Default: INT_MAX (unlimited) +skip_notify_on_dev_down - BOOLEAN + Controls whether an RTM_DELROUTE message is generated for routes + removed when a device is taken down or deleted. IPv4 does not + generate this message; IPv6 does by default. Setting this sysctl + to true skips the message, making IPv4 and IPv6 on par in relying + on userspace caches to track link events and evict routes. + Default: false (generate message) + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index f06e968f1992..caabfd84a098 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -407,6 +407,9 @@ struct fib6_node *fib6_locate(struct fib6_node *root, void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *arg), + void *arg); int fib6_add(struct fib6_node *root, struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index f0e396ab9bec..ef1ed529f33c 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -45,6 +45,7 @@ struct netns_sysctl_ipv6 { int max_dst_opts_len; int max_hbh_opts_len; int seg6_flowlabel; + bool skip_notify_on_dev_down; }; struct netns_ipv6 { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e14d244c551f..9ba72d94d60f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -47,6 +47,7 @@ struct fib6_cleaner { int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; + bool skip_notify; }; #ifdef CONFIG_IPV6_SUBTREES @@ -1956,6 +1957,7 @@ static int fib6_clean_node(struct fib6_walker *w) struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, + .skip_notify = c->skip_notify, }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && @@ -2007,7 +2009,7 @@ static int fib6_clean_node(struct fib6_walker *w) static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct fib6_info *, void *arg), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_cleaner c; @@ -2019,13 +2021,14 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, c.sernum = sernum; c.arg = arg; c.net = net; + c.skip_notify = skip_notify; fib6_walk(net, &c.w); } static void __fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), - int sernum, void *arg) + int sernum, void *arg, bool skip_notify) { struct fib6_table *table; struct hlist_head *head; @@ -2037,7 +2040,7 @@ static void __fib6_clean_all(struct net *net, hlist_for_each_entry_rcu(table, head, tb6_hlist) { spin_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, sernum, arg); + func, sernum, arg, skip_notify); spin_unlock_bh(&table->tb6_lock); } } @@ -2047,14 +2050,21 @@ static void __fib6_clean_all(struct net *net, void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { - __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false); +} + +void fib6_clean_all_skip_notify(struct net *net, + int (*func)(struct fib6_info *, void *), + void *arg) +{ + __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true); } static void fib6_flush_trees(struct net *net) { int new_sernum = fib6_new_sernum(net); - __fib6_clean_all(net, NULL, new_sernum, NULL); + __fib6_clean_all(net, NULL, new_sernum, NULL, false); } /* diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bf4cd647d8b8..f4e08b0689a8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4026,8 +4026,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) .event = event, }, }; + struct net *net = dev_net(dev); - fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); + if (net->ipv6.sysctl.skip_notify_on_dev_down) + fib6_clean_all_skip_notify(net, fib6_ifdown, &arg); + else + fib6_clean_all(net, fib6_ifdown, &arg); } void rt6_disable_ip(struct net_device *dev, unsigned long event) @@ -5031,6 +5035,9 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, return 0; } +static int zero; +static int one = 1; + static struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", @@ -5102,6 +5109,15 @@ static struct ctl_table ipv6_route_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, + { + .procname = "skip_notify_on_dev_down", + .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -5125,6 +5141,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; + table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -5189,6 +5206,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + net->ipv6.sysctl.skip_notify_on_dev_down = 0; net->ipv6.ip6_rt_gc_expire = 30*HZ;