Merge branch 'master' of git://1984.lsi.us.es/net-next-2.6

Conflicts:
	net/ipv4/route.c

Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
Patrick McHardy
2011-01-14 14:12:37 +01:00
5434 changed files with 383955 additions and 230618 deletions

View File

@@ -139,6 +139,8 @@ static unsigned long expires_ljiffies;
*/
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
static unsigned int ipv4_default_mtu(const struct dst_entry *dst);
static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
@@ -155,6 +157,8 @@ static struct dst_ops ipv4_dst_ops = {
.protocol = cpu_to_be16(ETH_P_IP),
.gc = rt_garbage_collect,
.check = ipv4_dst_check,
.default_advmss = ipv4_default_advmss,
.default_mtu = ipv4_default_mtu,
.destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
@@ -383,8 +387,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
(__force u32)r->rt_gateway,
r->rt_flags, atomic_read(&r->dst.__refcnt),
r->dst.__use, 0, (__force u32)r->rt_src,
(dst_metric(&r->dst, RTAX_ADVMSS) ?
(int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
dst_metric_advmss(&r->dst) + 40,
dst_metric(&r->dst, RTAX_WINDOW),
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)),
@@ -684,17 +687,17 @@ static inline bool rt_caching(const struct net *net)
static inline bool compare_hash_inputs(const struct flowi *fl1,
const struct flowi *fl2)
{
return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
(fl1->iif ^ fl2->iif)) == 0);
}
static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
{
return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
(fl1->mark ^ fl2->mark) |
(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
(*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
(fl1->oif ^ fl2->oif) |
(fl1->iif ^ fl2->iif)) == 0;
}
@@ -714,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth)
* Can be called by a softirq or a process.
* In the later case, we want to be reschedule if necessary
*/
static void rt_do_flush(int process_context)
static void rt_do_flush(struct net *net, int process_context)
{
unsigned int i;
struct rtable *rth, *next;
struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) {
struct rtable __rcu **pprev;
struct rtable *list;
if (process_context && need_resched())
cond_resched();
rth = rcu_dereference_raw(rt_hash_table[i].chain);
@@ -728,50 +733,32 @@ static void rt_do_flush(int process_context)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
struct rtable __rcu **prev;
struct rtable *p;
rth = rcu_dereference_protected(rt_hash_table[i].chain,
list = NULL;
pprev = &rt_hash_table[i].chain;
rth = rcu_dereference_protected(*pprev,
lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
for (tail = rth; tail;
tail = rcu_dereference_protected(tail->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
rt_hash_table[i].chain = tail;
while (rth) {
next = rcu_dereference_protected(rth->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i)));
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
for (p = rcu_dereference_protected(*prev,
lockdep_is_held(rt_hash_lock_addr(i)));
p != NULL;
p = next) {
next = rcu_dereference_protected(p->dst.rt_next,
lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
if (!net ||
net_eq(dev_net(rth->dst.dev), net)) {
rcu_assign_pointer(*pprev, next);
rcu_assign_pointer(rth->dst.rt_next, list);
list = rth;
} else {
*prev = next;
rt_free(p);
pprev = &rth->dst.rt_next;
}
rth = next;
}
}
#else
rth = rcu_dereference_protected(rt_hash_table[i].chain,
lockdep_is_held(rt_hash_lock_addr(i)));
rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
for (; list; list = next) {
next = rcu_dereference_protected(list->dst.rt_next, 1);
rt_free(list);
}
}
}
@@ -919,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay)
{
rt_cache_invalidate(net);
if (delay >= 0)
rt_do_flush(!in_softirq());
rt_do_flush(net, !in_softirq());
}
/* Flush previous cache invalidated entries from the cache */
void rt_cache_flush_batch(void)
void rt_cache_flush_batch(struct net *net)
{
rt_do_flush(!in_softirq());
rt_do_flush(net, !in_softirq());
}
static void rt_emergency_hash_rebuild(struct net *net)
@@ -1289,7 +1276,7 @@ void rt_bind_peer(struct rtable *rt, int create)
{
struct inet_peer *peer;
peer = inet_getpeer(rt->rt_dst, create);
peer = inet_getpeer_v4(rt->rt_dst, create);
if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
inet_putpeer(peer);
@@ -1686,11 +1673,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
if (mtu < dst_mtu(&rth->dst)) {
dst_confirm(&rth->dst);
if (mtu < ip_rt_min_pmtu) {
u32 lock = dst_metric(&rth->dst,
RTAX_LOCK);
mtu = ip_rt_min_pmtu;
rth->dst.metrics[RTAX_LOCK-1] |=
(1 << RTAX_MTU);
lock |= (1 << RTAX_MTU);
dst_metric_set(&rth->dst, RTAX_LOCK,
lock);
}
rth->dst.metrics[RTAX_MTU-1] = mtu;
dst_metric_set(&rth->dst, RTAX_MTU, mtu);
dst_set_expires(&rth->dst,
ip_rt_mtu_expires);
}
@@ -1708,10 +1698,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
if (dst_mtu(dst) > mtu && mtu >= 68 &&
!(dst_metric_locked(dst, RTAX_MTU))) {
if (mtu < ip_rt_min_pmtu) {
u32 lock = dst_metric(dst, RTAX_LOCK);
mtu = ip_rt_min_pmtu;
dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU));
}
dst->metrics[RTAX_MTU-1] = mtu;
dst_metric_set(dst, RTAX_MTU, mtu);
dst_set_expires(dst, ip_rt_mtu_expires);
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
@@ -1794,38 +1785,55 @@ static void set_class_tag(struct rtable *rt, u32 tag)
}
#endif
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
if (advmss == 0) {
advmss = max_t(unsigned int, dst->dev->mtu - 40,
ip_rt_min_advmss);
if (advmss > 65535 - 40)
advmss = 65535 - 40;
}
return advmss;
}
static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
{
unsigned int mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
const struct rtable *rt = (const struct rtable *) dst;
if (rt->rt_gateway != rt->rt_dst && mtu > 576)
mtu = 576;
}
if (mtu > IP_MAX_MTU)
mtu = IP_MAX_MTU;
return mtu;
}
static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
{
struct dst_entry *dst = &rt->dst;
struct fib_info *fi = res->fi;
if (fi) {
if (FIB_RES_GW(*res) &&
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
memcpy(rt->dst.metrics, fi->fib_metrics,
sizeof(rt->dst.metrics));
if (fi->fib_mtu == 0) {
rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
rt->rt_gateway != rt->rt_dst &&
rt->dst.dev->mtu > 576)
rt->dst.metrics[RTAX_MTU-1] = 576;
}
dst_import_metrics(dst, fi->fib_metrics);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
} else
rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
}
if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
if (dst_mtu(&rt->dst) > IP_MAX_MTU)
rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
ip_rt_min_advmss);
if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
if (dst_mtu(dst) > IP_MAX_MTU)
dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -2089,12 +2097,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = saddr,
.tos = tos,
.scope = RT_SCOPE_UNIVERSE,
} },
struct flowi fl = { .fl4_dst = daddr,
.fl4_src = saddr,
.fl4_tos = tos,
.fl4_scope = RT_SCOPE_UNIVERSE,
.mark = skb->mark,
.iif = dev->ifindex };
unsigned flags = 0;
@@ -2480,14 +2486,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
const struct flowi *oldflp)
{
u32 tos = RT_FL_TOS(oldflp);
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = oldflp->fl4_dst,
.saddr = oldflp->fl4_src,
.tos = tos & IPTOS_RT_MASK,
.scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK :
RT_SCOPE_UNIVERSE),
} },
struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
.fl4_src = oldflp->fl4_src,
.fl4_tos = tos & IPTOS_RT_MASK,
.fl4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
.mark = oldflp->mark,
.iif = net->loopback_dev->ifindex,
.oif = oldflp->oif };
@@ -2559,9 +2562,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
goto out;
/* RACE: Check return value of inet_select_addr instead. */
if (rcu_dereference(dev_out->ip_ptr) == NULL)
goto out; /* Wrong error code */
if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
err = -ENETUNREACH;
goto out;
}
if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
ipv4_is_lbcast(oldflp->fl4_dst)) {
if (!fl.fl4_src)
@@ -2622,8 +2626,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
}
if (res.type == RTN_LOCAL) {
if (!fl.fl4_src)
fl.fl4_src = fl.fl4_dst;
if (!fl.fl4_src) {
if (res.fi->fib_prefsrc)
fl.fl4_src = res.fi->fib_prefsrc;
else
fl.fl4_src = fl.fl4_dst;
}
dev_out = net->loopback_dev;
fl.oif = dev_out->ifindex;
res.fi = NULL;
@@ -2725,7 +2733,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
dst_copy_metrics(new, &ort->dst);
new->dev = ort->dst.dev;
if (new->dev)
@@ -2832,7 +2840,7 @@ static int rt_fill_info(struct net *net,
if (rt->rt_dst != rt->rt_gateway)
NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
if (rt->fl.mark)
@@ -2944,13 +2952,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
err = -rt->dst.error;
} else {
struct flowi fl = {
.nl_u = {
.ip4_u = {
.daddr = dst,
.saddr = src,
.tos = rtm->rtm_tos,
},
},
.fl4_dst = dst,
.fl4_src = src,
.fl4_tos = rtm->rtm_tos,
.oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
.mark = mark,
};