Merge branch 'master' of git://1984.lsi.us.es/net-next-2.6
Conflicts: net/ipv4/route.c Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
208
net/ipv4/route.c
208
net/ipv4/route.c
@@ -139,6 +139,8 @@ static unsigned long expires_ljiffies;
|
||||
*/
|
||||
|
||||
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
|
||||
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
|
||||
static unsigned int ipv4_default_mtu(const struct dst_entry *dst);
|
||||
static void ipv4_dst_destroy(struct dst_entry *dst);
|
||||
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
|
||||
static void ipv4_link_failure(struct sk_buff *skb);
|
||||
@@ -155,6 +157,8 @@ static struct dst_ops ipv4_dst_ops = {
|
||||
.protocol = cpu_to_be16(ETH_P_IP),
|
||||
.gc = rt_garbage_collect,
|
||||
.check = ipv4_dst_check,
|
||||
.default_advmss = ipv4_default_advmss,
|
||||
.default_mtu = ipv4_default_mtu,
|
||||
.destroy = ipv4_dst_destroy,
|
||||
.ifdown = ipv4_dst_ifdown,
|
||||
.negative_advice = ipv4_negative_advice,
|
||||
@@ -383,8 +387,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
|
||||
(__force u32)r->rt_gateway,
|
||||
r->rt_flags, atomic_read(&r->dst.__refcnt),
|
||||
r->dst.__use, 0, (__force u32)r->rt_src,
|
||||
(dst_metric(&r->dst, RTAX_ADVMSS) ?
|
||||
(int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
|
||||
dst_metric_advmss(&r->dst) + 40,
|
||||
dst_metric(&r->dst, RTAX_WINDOW),
|
||||
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
|
||||
dst_metric(&r->dst, RTAX_RTTVAR)),
|
||||
@@ -684,17 +687,17 @@ static inline bool rt_caching(const struct net *net)
|
||||
static inline bool compare_hash_inputs(const struct flowi *fl1,
|
||||
const struct flowi *fl2)
|
||||
{
|
||||
return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
|
||||
((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
|
||||
return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
|
||||
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
|
||||
(fl1->iif ^ fl2->iif)) == 0);
|
||||
}
|
||||
|
||||
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
|
||||
{
|
||||
return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
|
||||
((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
|
||||
return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
|
||||
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
|
||||
(fl1->mark ^ fl2->mark) |
|
||||
(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
|
||||
(*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
|
||||
(fl1->oif ^ fl2->oif) |
|
||||
(fl1->iif ^ fl2->iif)) == 0;
|
||||
}
|
||||
@@ -714,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
|
||||
* Can be called by a softirq or a process.
|
||||
* In the later case, we want to be reschedule if necessary
|
||||
*/
|
||||
static void rt_do_flush(int process_context)
|
||||
static void rt_do_flush(struct net *net, int process_context)
|
||||
{
|
||||
unsigned int i;
|
||||
struct rtable *rth, *next;
|
||||
struct rtable * tail;
|
||||
|
||||
for (i = 0; i <= rt_hash_mask; i++) {
|
||||
struct rtable __rcu **pprev;
|
||||
struct rtable *list;
|
||||
|
||||
if (process_context && need_resched())
|
||||
cond_resched();
|
||||
rth = rcu_dereference_raw(rt_hash_table[i].chain);
|
||||
@@ -728,50 +733,32 @@ static void rt_do_flush(int process_context)
|
||||
continue;
|
||||
|
||||
spin_lock_bh(rt_hash_lock_addr(i));
|
||||
#ifdef CONFIG_NET_NS
|
||||
{
|
||||
struct rtable __rcu **prev;
|
||||
struct rtable *p;
|
||||
|
||||
rth = rcu_dereference_protected(rt_hash_table[i].chain,
|
||||
list = NULL;
|
||||
pprev = &rt_hash_table[i].chain;
|
||||
rth = rcu_dereference_protected(*pprev,
|
||||
lockdep_is_held(rt_hash_lock_addr(i)));
|
||||
|
||||
/* defer releasing the head of the list after spin_unlock */
|
||||
for (tail = rth; tail;
|
||||
tail = rcu_dereference_protected(tail->dst.rt_next,
|
||||
lockdep_is_held(rt_hash_lock_addr(i))))
|
||||
if (!rt_is_expired(tail))
|
||||
break;
|
||||
if (rth != tail)
|
||||
rt_hash_table[i].chain = tail;
|
||||
while (rth) {
|
||||
next = rcu_dereference_protected(rth->dst.rt_next,
|
||||
lockdep_is_held(rt_hash_lock_addr(i)));
|
||||
|
||||
/* call rt_free on entries after the tail requiring flush */
|
||||
prev = &rt_hash_table[i].chain;
|
||||
for (p = rcu_dereference_protected(*prev,
|
||||
lockdep_is_held(rt_hash_lock_addr(i)));
|
||||
p != NULL;
|
||||
p = next) {
|
||||
next = rcu_dereference_protected(p->dst.rt_next,
|
||||
lockdep_is_held(rt_hash_lock_addr(i)));
|
||||
if (!rt_is_expired(p)) {
|
||||
prev = &p->dst.rt_next;
|
||||
if (!net ||
|
||||
net_eq(dev_net(rth->dst.dev), net)) {
|
||||
rcu_assign_pointer(*pprev, next);
|
||||
rcu_assign_pointer(rth->dst.rt_next, list);
|
||||
list = rth;
|
||||
} else {
|
||||
*prev = next;
|
||||
rt_free(p);
|
||||
pprev = &rth->dst.rt_next;
|
||||
}
|
||||
rth = next;
|
||||
}
|
||||
}
|
||||
#else
|
||||
rth = rcu_dereference_protected(rt_hash_table[i].chain,
|
||||
lockdep_is_held(rt_hash_lock_addr(i)));
|
||||
rcu_assign_pointer(rt_hash_table[i].chain, NULL);
|
||||
tail = NULL;
|
||||
#endif
|
||||
|
||||
spin_unlock_bh(rt_hash_lock_addr(i));
|
||||
|
||||
for (; rth != tail; rth = next) {
|
||||
next = rcu_dereference_protected(rth->dst.rt_next, 1);
|
||||
rt_free(rth);
|
||||
for (; list; list = next) {
|
||||
next = rcu_dereference_protected(list->dst.rt_next, 1);
|
||||
rt_free(list);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -919,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
|
||||
{
|
||||
rt_cache_invalidate(net);
|
||||
if (delay >= 0)
|
||||
rt_do_flush(!in_softirq());
|
||||
rt_do_flush(net, !in_softirq());
|
||||
}
|
||||
|
||||
/* Flush previous cache invalidated entries from the cache */
|
||||
void rt_cache_flush_batch(void)
|
||||
void rt_cache_flush_batch(struct net *net)
|
||||
{
|
||||
rt_do_flush(!in_softirq());
|
||||
rt_do_flush(net, !in_softirq());
|
||||
}
|
||||
|
||||
static void rt_emergency_hash_rebuild(struct net *net)
|
||||
@@ -1289,7 +1276,7 @@ void rt_bind_peer(struct rtable *rt, int create)
|
||||
{
|
||||
struct inet_peer *peer;
|
||||
|
||||
peer = inet_getpeer(rt->rt_dst, create);
|
||||
peer = inet_getpeer_v4(rt->rt_dst, create);
|
||||
|
||||
if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
|
||||
inet_putpeer(peer);
|
||||
@@ -1686,11 +1673,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
|
||||
if (mtu < dst_mtu(&rth->dst)) {
|
||||
dst_confirm(&rth->dst);
|
||||
if (mtu < ip_rt_min_pmtu) {
|
||||
u32 lock = dst_metric(&rth->dst,
|
||||
RTAX_LOCK);
|
||||
mtu = ip_rt_min_pmtu;
|
||||
rth->dst.metrics[RTAX_LOCK-1] |=
|
||||
(1 << RTAX_MTU);
|
||||
lock |= (1 << RTAX_MTU);
|
||||
dst_metric_set(&rth->dst, RTAX_LOCK,
|
||||
lock);
|
||||
}
|
||||
rth->dst.metrics[RTAX_MTU-1] = mtu;
|
||||
dst_metric_set(&rth->dst, RTAX_MTU, mtu);
|
||||
dst_set_expires(&rth->dst,
|
||||
ip_rt_mtu_expires);
|
||||
}
|
||||
@@ -1708,10 +1698,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
|
||||
if (dst_mtu(dst) > mtu && mtu >= 68 &&
|
||||
!(dst_metric_locked(dst, RTAX_MTU))) {
|
||||
if (mtu < ip_rt_min_pmtu) {
|
||||
u32 lock = dst_metric(dst, RTAX_LOCK);
|
||||
mtu = ip_rt_min_pmtu;
|
||||
dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
|
||||
dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
|
||||
}
|
||||
dst->metrics[RTAX_MTU-1] = mtu;
|
||||
dst_metric_set(dst, RTAX_MTU, mtu);
|
||||
dst_set_expires(dst, ip_rt_mtu_expires);
|
||||
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
|
||||
}
|
||||
@@ -1794,38 +1785,55 @@ static void set_class_tag(struct rtable *rt, u32 tag)
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
|
||||
{
|
||||
unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
|
||||
|
||||
if (advmss == 0) {
|
||||
advmss = max_t(unsigned int, dst->dev->mtu - 40,
|
||||
ip_rt_min_advmss);
|
||||
if (advmss > 65535 - 40)
|
||||
advmss = 65535 - 40;
|
||||
}
|
||||
return advmss;
|
||||
}
|
||||
|
||||
static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
|
||||
{
|
||||
unsigned int mtu = dst->dev->mtu;
|
||||
|
||||
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
|
||||
const struct rtable *rt = (const struct rtable *) dst;
|
||||
|
||||
if (rt->rt_gateway != rt->rt_dst && mtu > 576)
|
||||
mtu = 576;
|
||||
}
|
||||
|
||||
if (mtu > IP_MAX_MTU)
|
||||
mtu = IP_MAX_MTU;
|
||||
|
||||
return mtu;
|
||||
}
|
||||
|
||||
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
|
||||
{
|
||||
struct dst_entry *dst = &rt->dst;
|
||||
struct fib_info *fi = res->fi;
|
||||
|
||||
if (fi) {
|
||||
if (FIB_RES_GW(*res) &&
|
||||
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
|
||||
rt->rt_gateway = FIB_RES_GW(*res);
|
||||
memcpy(rt->dst.metrics, fi->fib_metrics,
|
||||
sizeof(rt->dst.metrics));
|
||||
if (fi->fib_mtu == 0) {
|
||||
rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
|
||||
if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
|
||||
rt->rt_gateway != rt->rt_dst &&
|
||||
rt->dst.dev->mtu > 576)
|
||||
rt->dst.metrics[RTAX_MTU-1] = 576;
|
||||
}
|
||||
dst_import_metrics(dst, fi->fib_metrics);
|
||||
#ifdef CONFIG_IP_ROUTE_CLASSID
|
||||
rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
|
||||
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
|
||||
#endif
|
||||
} else
|
||||
rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
|
||||
}
|
||||
|
||||
if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
|
||||
rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
|
||||
if (dst_mtu(&rt->dst) > IP_MAX_MTU)
|
||||
rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
|
||||
if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
|
||||
rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
|
||||
ip_rt_min_advmss);
|
||||
if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
|
||||
rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
|
||||
if (dst_mtu(dst) > IP_MAX_MTU)
|
||||
dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
|
||||
if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
|
||||
dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
|
||||
|
||||
#ifdef CONFIG_IP_ROUTE_CLASSID
|
||||
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
||||
@@ -2089,12 +2097,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
||||
{
|
||||
struct fib_result res;
|
||||
struct in_device *in_dev = __in_dev_get_rcu(dev);
|
||||
struct flowi fl = { .nl_u = { .ip4_u =
|
||||
{ .daddr = daddr,
|
||||
.saddr = saddr,
|
||||
.tos = tos,
|
||||
.scope = RT_SCOPE_UNIVERSE,
|
||||
} },
|
||||
struct flowi fl = { .fl4_dst = daddr,
|
||||
.fl4_src = saddr,
|
||||
.fl4_tos = tos,
|
||||
.fl4_scope = RT_SCOPE_UNIVERSE,
|
||||
.mark = skb->mark,
|
||||
.iif = dev->ifindex };
|
||||
unsigned flags = 0;
|
||||
@@ -2480,14 +2486,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
||||
const struct flowi *oldflp)
|
||||
{
|
||||
u32 tos = RT_FL_TOS(oldflp);
|
||||
struct flowi fl = { .nl_u = { .ip4_u =
|
||||
{ .daddr = oldflp->fl4_dst,
|
||||
.saddr = oldflp->fl4_src,
|
||||
.tos = tos & IPTOS_RT_MASK,
|
||||
.scope = ((tos & RTO_ONLINK) ?
|
||||
RT_SCOPE_LINK :
|
||||
RT_SCOPE_UNIVERSE),
|
||||
} },
|
||||
struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
|
||||
.fl4_src = oldflp->fl4_src,
|
||||
.fl4_tos = tos & IPTOS_RT_MASK,
|
||||
.fl4_scope = ((tos & RTO_ONLINK) ?
|
||||
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
|
||||
.mark = oldflp->mark,
|
||||
.iif = net->loopback_dev->ifindex,
|
||||
.oif = oldflp->oif };
|
||||
@@ -2559,9 +2562,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
||||
goto out;
|
||||
|
||||
/* RACE: Check return value of inet_select_addr instead. */
|
||||
if (rcu_dereference(dev_out->ip_ptr) == NULL)
|
||||
goto out; /* Wrong error code */
|
||||
|
||||
if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
|
||||
err = -ENETUNREACH;
|
||||
goto out;
|
||||
}
|
||||
if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
|
||||
ipv4_is_lbcast(oldflp->fl4_dst)) {
|
||||
if (!fl.fl4_src)
|
||||
@@ -2622,8 +2626,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
||||
}
|
||||
|
||||
if (res.type == RTN_LOCAL) {
|
||||
if (!fl.fl4_src)
|
||||
fl.fl4_src = fl.fl4_dst;
|
||||
if (!fl.fl4_src) {
|
||||
if (res.fi->fib_prefsrc)
|
||||
fl.fl4_src = res.fi->fib_prefsrc;
|
||||
else
|
||||
fl.fl4_src = fl.fl4_dst;
|
||||
}
|
||||
dev_out = net->loopback_dev;
|
||||
fl.oif = dev_out->ifindex;
|
||||
res.fi = NULL;
|
||||
@@ -2725,7 +2733,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
|
||||
new->__use = 1;
|
||||
new->input = dst_discard;
|
||||
new->output = dst_discard;
|
||||
memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
|
||||
dst_copy_metrics(new, &ort->dst);
|
||||
|
||||
new->dev = ort->dst.dev;
|
||||
if (new->dev)
|
||||
@@ -2832,7 +2840,7 @@ static int rt_fill_info(struct net *net,
|
||||
if (rt->rt_dst != rt->rt_gateway)
|
||||
NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
|
||||
|
||||
if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
|
||||
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
if (rt->fl.mark)
|
||||
@@ -2944,13 +2952,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
|
||||
err = -rt->dst.error;
|
||||
} else {
|
||||
struct flowi fl = {
|
||||
.nl_u = {
|
||||
.ip4_u = {
|
||||
.daddr = dst,
|
||||
.saddr = src,
|
||||
.tos = rtm->rtm_tos,
|
||||
},
|
||||
},
|
||||
.fl4_dst = dst,
|
||||
.fl4_src = src,
|
||||
.fl4_tos = rtm->rtm_tos,
|
||||
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
|
||||
.mark = mark,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user