From 5481d73f81549e2a05cbbb49867a9a560c5292df Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:49 -0700 Subject: [PATCH 1/7] ipv4: Use accessors for fib_info nexthop data Use helpers to access fib_nh and fib_nhs fields of a fib_info. Drop the fib_dev macro which is an alias for the first nexthop. Replacements: fi->fib_dev --> fib_info_nh(fi, 0)->fib_nh_dev fi->fib_nh --> fib_info_nh(fi, 0) fi->fib_nh[i] --> fib_info_nh(fi, i) fi->fib_nhs --> fib_info_num_path(fi) where fib_info_nh(fi, i) returns fi->fib_nh[nhsel] and fib_info_num_path returns fi->fib_nhs. Move the existing fib_info_nhc to nexthop.h and define the new ones there. A later patch adds a check if a fib_info uses a nexthop object, and defining the helpers in nexthop.h avoid circular header dependencies. After this all remaining open coded references to fi->fib_nhs and fi->fib_nh are in: - fib_create_info and helpers used to lookup an existing fib_info entry, and - the netdev event functions fib_sync_down_dev and fib_sync_up. The latter two will not be reused for nexthops, and the fib_create_info will be updated to handle a nexthop in a fib_info. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/lag_mp.c | 29 +++++--- .../ethernet/mellanox/mlxsw/spectrum_router.c | 19 +++--- drivers/net/ethernet/rocker/rocker_ofdpa.c | 25 ++++--- include/net/ip_fib.h | 6 -- include/net/nexthop.h | 15 +++++ net/core/filter.c | 3 +- net/ipv4/fib_frontend.c | 11 ++-- net/ipv4/fib_lookup.h | 1 + net/ipv4/fib_rules.c | 8 ++- net/ipv4/fib_semantics.c | 66 +++++++++++-------- net/ipv4/fib_trie.c | 26 +++++--- net/ipv4/route.c | 3 +- 12 files changed, 132 insertions(+), 80 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 8212bfd05733..2cbfaa8da7fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "lag.h" #include "lag_mp.h" #include "mlx5_core.h" @@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle add/replace event */ - if (fi->fib_nhs == 1) { + nhs = fib_info_num_path(fi); + if (nhs == 1) { if (__mlx5_lag_is_active(ldev)) { - struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev; + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); mlx5_lag_set_port_affinity(ldev, ++i); @@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; } - if (fi->fib_nhs != 2) + if (nhs != 2) return; /* Verify next hops are ports of the same hca */ - if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) && - !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) { + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev && + fib_nh1->fib_nh_dev == ldev->pf[1].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) { mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); return; } @@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } } else if (event == FIB_EVENT_NH_ADD && - fi->fib_nhs == 2) { + fib_info_num_path(fi) == 2) { mlx5_lag_set_port_affinity(ldev, 0); } } @@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct mlx5_fib_event_work *fib_work; struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; struct fib_info *fi; if (info->family != AF_INET) @@ -254,8 +262,9 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->fib_dev != ldev->pf[0].netdev && - fi->fib_dev != ldev->pf[1].netdev) { + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[0].netdev && + fib_dev != ldev->pf[1].netdev) { return NOTIFY_DONE; } fib_work = mlx5_lag_init_fib_work(ldev, event); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0ec52be7cc33..4f781358aef1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -3816,23 +3817,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct fib_info *fi) + struct fib_info *fi) { - return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK || - mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); + const struct fib_nh *nh = fib_info_nh(fi, 0); + + return nh->fib_nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } static struct mlxsw_sp_nexthop_group * mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { + unsigned int nhs = fib_info_num_path(fi); struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; int i; int err; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), - GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -3840,11 +3843,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp->neigh_tbl = &arp_tbl; nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = fi->fib_nhs; + nh_grp->count = nhs; fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - fib_nh = &fi->fib_nh[i]; + fib_nh = fib_info_nh(fi, i); err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) goto err_nexthop4_init; @@ -4282,9 +4285,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); - struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 30a49802fb51..47ed9d41047f 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "rocker.h" @@ -2286,8 +2287,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, __be32 dst, /* XXX support ECMP */ - nh = fi->fib_nh; - nh_on_port = (fi->fib_dev == ofdpa_port->dev); + nh = fib_info_nh(fi, 0); + nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev); has_gw = !!nh->fib_nh_gw4; if (has_gw && nh_on_port) { @@ -2737,11 +2738,13 @@ static int ofdpa_fib4_add(struct rocker *rocker, { struct ofdpa *ofdpa = rocker->wpriv; struct ofdpa_port *ofdpa_port; + struct fib_nh *nh; int err; if (ofdpa->fib_aborted) return 0; - ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + nh = fib_info_nh(fen_info->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) return 0; err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst), @@ -2749,7 +2752,7 @@ static int ofdpa_fib4_add(struct rocker *rocker, fen_info->tb_id, 0); if (err) return err; - fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + nh->fib_nh_flags |= RTNH_F_OFFLOAD; return 0; } @@ -2758,13 +2761,15 @@ static int ofdpa_fib4_del(struct rocker *rocker, { struct ofdpa *ofdpa = rocker->wpriv; struct ofdpa_port *ofdpa_port; + struct fib_nh *nh; if (ofdpa->fib_aborted) return 0; - ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + nh = fib_info_nh(fen_info->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) return 0; - fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst), fen_info->dst_len, fen_info->fi, fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); @@ -2784,14 +2789,16 @@ static void ofdpa_fib4_abort(struct rocker *rocker) spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) { + struct fib_nh *nh; + if (flow_entry->key.tbl_id != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) continue; - ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev, - rocker); + nh = fib_info_nh(flow_entry->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) continue; - flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, flow_entry); } diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 70ba0302c8c9..42b1a806f6f5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -153,7 +153,6 @@ struct fib_info { bool nh_updated; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].fib_nh_dev }; @@ -190,11 +189,6 @@ struct fib_result_nl { int err; }; -static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) -{ - return &fi->fib_nh[nhsel].nh_common; -} - #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 6e1b8f53624c..e501d77b82c8 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -192,4 +192,19 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh) nhi = rcu_dereference_rtnl(nh->nh_info); return nhi->reject_nh; } + +static inline unsigned int fib_info_num_path(const struct fib_info *fi) +{ + return fi->fib_nhs; +} + +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel].nh_common; +} + +static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel]; +} #endif diff --git a/net/core/filter.c b/net/core/filter.c index 55bfc941d17a..2ae72bbfa6d2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -4674,7 +4675,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (res.type != RTN_UNICAST) return BPF_FIB_LKUP_RET_NOT_FWDED; - if (res.fi->fib_nhs > 1) + if (fib_info_num_path(res.fi) > 1) fib_select_path(net, &res, &fl4, NULL); if (check_mtu) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7cdb8d0d164..a4691360b395 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -234,7 +235,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (table) { ret = RTN_UNICAST; if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) { - if (!dev || dev == res.fi->fib_dev) + struct fib_nh *nh = fib_info_nh(res.fi, 0); + + if (!dev || dev == nh->fib_nh_dev) ret = res.type; } } @@ -321,8 +324,8 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) #ifdef CONFIG_IP_ROUTE_MULTIPATH int ret; - for (ret = 0; ret < fi->fib_nhs; ret++) { - struct fib_nh *nh = &fi->fib_nh[ret]; + for (ret = 0; ret < fib_info_num_path(fi); ret++) { + const struct fib_nh *nh = fib_info_nh(fi, ret); if (nh->fib_nh_dev == dev) { dev_match = true; @@ -333,7 +336,7 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) } } #else - if (fi->fib_nh[0].fib_nh_dev == dev) + if (fib_info_nh(fi, 0)->fib_nh_dev == dev) dev_match = true; #endif diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 7945f0534db7..a68b5e21ec51 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -5,6 +5,7 @@ #include #include #include +#include struct fib_alias { struct hlist_node fa_list; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index cfec3af54c8d..ab06fd73b343 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -31,6 +31,7 @@ #include #include #include +#include #include struct fib4_rule { @@ -145,8 +146,11 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg struct fib_result *result = (struct fib_result *) arg->result; struct net_device *dev = NULL; - if (result->fi) - dev = result->fi->fib_dev; + if (result->fi) { + struct fib_nh *nh = fib_info_nh(result->fi, 0); + + dev = nh->fib_nh_dev; + } /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 78648072783e..a37ff07718a8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -65,13 +66,13 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ for (nhsel = 0, nh = (fi)->fib_nh; \ - nhsel < (fi)->fib_nhs; \ + nhsel < fib_info_num_path((fi)); \ nh++, nhsel++) #define change_nexthops(fi) { \ int nhsel; struct fib_nh *nexthop_nh; \ for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ - nhsel < (fi)->fib_nhs; \ + nhsel < fib_info_num_path((fi)); \ nexthop_nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -271,11 +272,13 @@ void fib_release_info(struct fib_info *fi) spin_unlock_bh(&fib_info_lock); } -static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) +static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) { - const struct fib_nh *onh = ofi->fib_nh; + const struct fib_nh *onh; for_nexthops(fi) { + onh = fib_info_nh(ofi, nhsel); + if (nh->fib_nh_oif != onh->fib_nh_oif || nh->fib_nh_gw_family != onh->fib_nh_gw_family || nh->fib_nh_scope != onh->fib_nh_scope || @@ -296,8 +299,6 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) if (nh->fib_nh_gw_family == AF_INET6 && ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) return -1; - - onh++; } endfor_nexthops(fi); return 0; } @@ -326,7 +327,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) return (val ^ (val >> 7) ^ (val >> 12)) & mask; } -static struct fib_info *fib_find_info(const struct fib_info *nfi) +static struct fib_info *fib_find_info(struct fib_info *nfi) { struct hlist_head *head; struct fib_info *fi; @@ -390,13 +391,14 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) + nla_total_size(4) /* RTA_PRIORITY */ + nla_total_size(4) /* RTA_PREFSRC */ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + unsigned int nhs = fib_info_num_path(fi); /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); - if (fi->fib_nhs) { + if (nhs) { size_t nh_encapsize = 0; - /* Also handles the special case fib_nhs == 1 */ + /* Also handles the special case nhs == 1 */ /* each nexthop is packed in an attribute */ size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); @@ -416,8 +418,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) } endfor_nexthops(fi); /* all nexthops are packed in a nested attribute */ - payload += nla_total_size((fi->fib_nhs * nhsize) + - nh_encapsize); + payload += nla_total_size((nhs * nhsize) + nh_encapsize); } @@ -584,6 +585,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, { struct net *net = fi->fib_net; struct fib_config fib_cfg; + struct fib_nh *nh; int ret; change_nexthops(fi) { @@ -646,24 +648,25 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } endfor_nexthops(fi); ret = -EINVAL; - if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) { + nh = fib_info_nh(fi, 0); + if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) { NL_SET_ERR_MSG(extack, "Nexthop device index does not match RTA_OIF"); goto errout; } if (cfg->fc_gw_family) { - if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family || + if (cfg->fc_gw_family != nh->fib_nh_gw_family || (cfg->fc_gw_family == AF_INET && - fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) || + nh->fib_nh_gw4 != cfg->fc_gw4) || (cfg->fc_gw_family == AF_INET6 && - ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) { + ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) { NL_SET_ERR_MSG(extack, "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); goto errout; } } #ifdef CONFIG_IP_ROUTE_CLASSID - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { + if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) { NL_SET_ERR_MSG(extack, "Nexthop class id does not match RTA_FLOW"); goto errout; @@ -679,7 +682,7 @@ static void fib_rebalance(struct fib_info *fi) int total; int w; - if (fi->fib_nhs < 2) + if (fib_info_num_path(fi) < 2) return; total = 0; @@ -761,27 +764,29 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, return 1; if (cfg->fc_oif || cfg->fc_gw_family) { + struct fib_nh *nh = fib_info_nh(fi, 0); + if (cfg->fc_encap) { if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, - fi->fib_nh, cfg, extack)) + nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID if (cfg->fc_flow && - cfg->fc_flow != fi->fib_nh->nh_tclassid) + cfg->fc_flow != nh->nh_tclassid) return 1; #endif - if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) || + if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) || (cfg->fc_gw_family && - cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family)) + cfg->fc_gw_family != nh->fib_nh_gw_family)) return 1; if (cfg->fc_gw_family == AF_INET && - cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4) + cfg->fc_gw4 != nh->fib_nh_gw4) return 1; if (cfg->fc_gw_family == AF_INET6 && - ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6)) + ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6)) return 1; return 0; @@ -1366,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } nh->fib_nh_scope = RT_SCOPE_NOWHERE; - nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif); + nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif); err = -ENODEV; if (!nh->fib_nh_dev) goto failure; @@ -1583,6 +1588,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { + unsigned int nhs = fib_info_num_path(fi); struct nlmsghdr *nlh; struct rtmsg *rtm; @@ -1618,8 +1624,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_prefsrc && nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; - if (fi->fib_nhs == 1) { - struct fib_nh *nh = &fi->fib_nh[0]; + if (nhs == 1) { + const struct fib_nh *nh = fib_info_nh(fi, 0); unsigned char flags = 0; if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) @@ -1838,6 +1844,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; + struct fib_nh *nh; if (fa->fa_slen != slen) continue; @@ -1859,8 +1866,9 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - if (!next_fi->fib_nh[0].fib_nh_gw4 || - next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK) + + nh = fib_info_nh(next_fi, 0); + if (!nh->fib_nh_gw4 || nh->fib_nh_scope != RT_SCOPE_LINK) continue; fib_alias_accessed(fa); @@ -2024,7 +2032,7 @@ void fib_select_path(struct net *net, struct fib_result *res, goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1) { + if (fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(net, fl4, skb, NULL); fib_select_multipath(res, h); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b53ecef89d59..5c8a4d21b8e0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1469,7 +1469,7 @@ found: } if (fi->fib_flags & RTNH_F_DEAD) continue; - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); if (nhc->nhc_flags & RTNH_F_DEAD) @@ -2717,14 +2717,18 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) +static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi) { unsigned int flags = 0; if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; - if (fi && fi->fib_nh->fib_nh_gw4) - flags |= RTF_GATEWAY; + if (fi) { + const struct fib_nh *nh = fib_info_nh(fi, 0); + + if (nh->fib_nh_gw4) + flags |= RTF_GATEWAY; + } if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; flags |= RTF_UP; @@ -2755,7 +2759,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) prefix = htonl(l->key); hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { - const struct fib_info *fi = fa->fa_info; + struct fib_info *fi = fa->fa_info; __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen); unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); @@ -2768,26 +2772,28 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) seq_setwidth(seq, 127); - if (fi) + if (fi) { + struct fib_nh *nh = fib_info_nh(fi, 0); + seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", - fi->fib_dev ? fi->fib_dev->name : "*", + nh->fib_nh_dev ? nh->fib_nh_dev->name : "*", prefix, - fi->fib_nh->fib_nh_gw4, flags, 0, 0, + nh->fib_nh_gw4, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), fi->fib_window, fi->fib_rtt >> 3); - else + } else { seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0); - + } seq_pad(seq, '\n'); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11ddc276776e..05a6a8ecb574 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include @@ -1950,7 +1951,7 @@ static int ip_mkroute_input(struct sk_buff *skb, struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi && res->fi->fib_nhs > 1) { + if (res->fi && fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); fib_select_multipath(res, h); From dcb1ecb50edf8219c3bd851de35897fb024c423b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:50 -0700 Subject: [PATCH 2/7] ipv4: Prepare for fib6_nh from a nexthop object Convert more IPv4 code to use fib_nh_common over fib_nh to enable routes to use a fib6_nh based nexthop. In the end, only code not using a nexthop object in a fib_info should directly access fib_nh in a fib_info without checking the famiy and going through fib_nh_common. Those functions will be marked when it is not directly evident. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 15 ++++++++--- net/ipv4/fib_frontend.c | 12 ++++----- net/ipv4/fib_rules.c | 4 +-- net/ipv4/fib_semantics.c | 55 +++++++++++++++++++++++++++------------- net/ipv4/fib_trie.c | 15 ++++++----- net/ipv4/nexthop.c | 3 ++- net/ipv4/route.c | 2 +- 7 files changed, 69 insertions(+), 37 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 42b1a806f6f5..7da8ea784029 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -195,8 +195,8 @@ struct fib_result_nl { #define FIB_TABLE_HASHSZ 2 #endif -__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh, - unsigned char scope); +__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, + unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) @@ -455,11 +455,18 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = nh->nh_tclassid << 16; + if (nhc->nhc_family == AF_INET) { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + *itag = nh->nh_tclassid << 16; + } else { + *itag = 0; + } + #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a4691360b395..5ea2750982f2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -235,9 +235,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (table) { ret = RTN_UNICAST; if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) { - struct fib_nh *nh = fib_info_nh(res.fi, 0); + struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0); - if (!dev || dev == nh->fib_nh_dev) + if (!dev || dev == nhc->nhc_dev) ret = res.type; } } @@ -325,18 +325,18 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) int ret; for (ret = 0; ret < fib_info_num_path(fi); ret++) { - const struct fib_nh *nh = fib_info_nh(fi, ret); + const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); - if (nh->fib_nh_dev == dev) { + if (nhc->nhc_dev == dev) { dev_match = true; break; - } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) { + } else if (l3mdev_master_ifindex_rcu(nhc->nhc_dev) == dev->ifindex) { dev_match = true; break; } } #else - if (fib_info_nh(fi, 0)->fib_nh_dev == dev) + if (fib_info_nhc(fi, 0)->nhc_dev == dev) dev_match = true; #endif diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ab06fd73b343..88807c138df4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -147,9 +147,9 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg struct net_device *dev = NULL; if (result->fi) { - struct fib_nh *nh = fib_info_nh(result->fi, 0); + struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0); - dev = nh->fib_nh_dev; + dev = nhc->nhc_dev; } /* do not accept result if the route does diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a37ff07718a8..4a12c69f7fa1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -61,6 +61,9 @@ static unsigned int fib_info_cnt; #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; +/* for_nexthops and change_nexthops only used when nexthop object + * is not set in a fib_info. The logic within can reference fib_nh. + */ #ifdef CONFIG_IP_ROUTE_MULTIPATH #define for_nexthops(fi) { \ @@ -402,20 +405,23 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* each nexthop is packed in an attribute */ size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); + unsigned int i; /* may contain flow and gateway attribute */ nhsize += 2 * nla_total_size(4); /* grab encap info */ - for_nexthops(fi) { - if (nh->fib_nh_lws) { + for (i = 0; i < fib_info_num_path(fi); i++) { + struct fib_nh_common *nhc = fib_info_nhc(fi, i); + + if (nhc->nhc_lwtstate) { /* RTA_ENCAP_TYPE */ nh_encapsize += lwtunnel_get_encap_size( - nh->fib_nh_lws); + nhc->nhc_lwtstate); /* RTA_ENCAP */ nh_encapsize += nla_total_size(2); } - } endfor_nexthops(fi); + } /* all nexthops are packed in a nested attribute */ payload += nla_total_size((nhs * nhsize) + nh_encapsize); @@ -1194,9 +1200,15 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, fib_info_hash_free(old_laddrhash, bytes); } -__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh, - unsigned char scope) +__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, + unsigned char scope) { + struct fib_nh *nh; + + if (nhc->nhc_family != AF_INET) + return inet_select_addr(nhc->nhc_dev, 0, scope); + + nh = container_of(nhc, struct fib_nh, nh_common); nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); @@ -1206,16 +1218,19 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh, __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) { struct fib_nh_common *nhc = res->nhc; - struct fib_nh *nh; if (res->fi->fib_prefsrc) return res->fi->fib_prefsrc; - nh = container_of(nhc, struct fib_nh, nh_common); - if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) - return nh->nh_saddr; + if (nhc->nhc_family == AF_INET) { + struct fib_nh *nh; - return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope); + nh = container_of(nhc, struct fib_nh, nh_common); + if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) + return nh->nh_saddr; + } + + return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope); } static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) @@ -1397,7 +1412,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } change_nexthops(fi) { - fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope); + fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, + fi->fib_scope); if (nexthop_nh->fib_nh_gw_family == AF_INET6) fi->fib_nh_is_v6 = true; } endfor_nexthops(fi) @@ -1625,17 +1641,22 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (nhs == 1) { - const struct fib_nh *nh = fib_info_nh(fi, 0); + const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); unsigned char flags = 0; - if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) + if (fib_nexthop_info(skb, nhc, &flags, false) < 0) goto nla_put_failure; rtm->rtm_flags = flags; #ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; + if (nhc->nhc_family == AF_INET) { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) + goto nla_put_failure; + } #endif } else { if (fib_add_multipath(skb, fi) < 0) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5c8a4d21b8e0..d704d1606b8f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2724,9 +2724,9 @@ static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi) if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; if (fi) { - const struct fib_nh *nh = fib_info_nh(fi, 0); + const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); - if (nh->fib_nh_gw4) + if (nhc->nhc_gw.ipv4) flags |= RTF_GATEWAY; } if (mask == htonl(0xFFFFFFFF)) @@ -2773,14 +2773,17 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) seq_setwidth(seq, 127); if (fi) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh_common *nhc = fib_info_nhc(fi, 0); + __be32 gw = 0; + + if (nhc->nhc_gw_family == AF_INET) + gw = nhc->nhc_gw.ipv4; seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" "%d\t%08X\t%d\t%u\t%u", - nh->fib_nh_dev ? nh->fib_nh_dev->name : "*", - prefix, - nh->fib_nh_gw4, flags, 0, 0, + nhc->nhc_dev ? nhc->nhc_dev->name : "*", + prefix, gw, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7a5a3d08fec3..aec4ecb145a0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -815,7 +815,8 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, err = fib_check_nh(net, fib_nh, tb_id, 0, extack); if (!err) { nh->nh_flags = fib_nh->fib_nh_flags; - fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope); + fib_info_update_nhc_saddr(net, &fib_nh->nh_common, + fib_nh->fib_nh_scope); } else { fib_nh_release(net, fib_nh); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 05a6a8ecb574..4a1168451f3a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1585,7 +1585,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ip_dst_init_metrics(&rt->dst, fi->fib_metrics); #ifdef CONFIG_IP_ROUTE_CLASSID - { + if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); From 4c7e8084fd467ddb2b0e6c6011f9c1064afb7e56 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:51 -0700 Subject: [PATCH 3/7] ipv4: Plumb support for nexthop object in a fib_info Add 'struct nexthop' and nh_list list_head to fib_info. nh_list is the fib_info side of the nexthop <-> fib_info relationship. Add fi_list list_head to 'struct nexthop' to track fib_info entries using a nexthop instance. Add __remove_nexthop_fib and add it to __remove_nexthop to walk the new list_head and mark those fib entries as dead when the nexthop is deleted. Add a few nexthop helpers for use when a nexthop is added to fib_info: - nexthop_cmp to determine if 2 nexthops are the same - nexthop_path_fib_result to select a path for a multipath 'struct nexthop' - nexthop_fib_nhc to select a specific fib_nh_common within a multipath 'struct nexthop' Update existing fib_info_nhc to use nexthop_fib_nhc if a fib_info uses a 'struct nexthop', and mark fib_info_nh as only used for the non-nexthop case. Update the fib_info functions to check for fi->nh and take a different path as needed: - free_fib_info_rcu - put the nexthop object reference - fib_release_info - remove the fib_info from the nexthop's fi_list - nh_comp - use nexthop_cmp when either fib_info references a nexthop object - fib_info_hashfn - use the nexthop id for the hashing vs the oif of each fib_nh in a fib_info - fib_nlmsg_size - add space for the RTA_NH_ID attribute - fib_create_info - verify nexthop reference can be taken, verify nexthop spec is valid for fib entry, and add fib_info to fi_list for a nexthop - fib_select_multipath - use the new nexthop_path_fib_result to select a path when nexthop objects are used - fib_table_lookup - if the 'struct nexthop' is a blackhole nexthop, treat it the same as a fib entry using 'blackhole' The bulk of the changes are in fib_semantics.c and most of that is moving the existing change_nexthops into an else branch. Update the nexthop code to walk fi_list on a nexthop deleted to remove fib entries referencing it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++ include/net/nexthop.h | 48 +++++++++++++ net/ipv4/fib_semantics.c | 142 +++++++++++++++++++++++++++++---------- net/ipv4/fib_trie.c | 7 ++ net/ipv4/nexthop.c | 64 ++++++++++++++++++ 5 files changed, 229 insertions(+), 36 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7da8ea784029..071d280de389 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -129,9 +129,12 @@ struct fib_nh { * This structure contains data shared by many of routes. */ +struct nexthop; + struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct list_head nh_list; struct net *fib_net; int fib_treeref; refcount_t fib_clntref; @@ -151,6 +154,7 @@ struct fib_info { int fib_nhs; bool fib_nh_is_v6; bool nh_updated; + struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[0]; }; diff --git a/include/net/nexthop.h b/include/net/nexthop.h index e501d77b82c8..2912a2d7a515 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -77,6 +77,7 @@ struct nh_group { struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ + struct list_head fi_list; /* v4 entries using nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; @@ -110,6 +111,12 @@ static inline void nexthop_put(struct nexthop *nh) call_rcu(&nh->rcu, nexthop_free_rcu); } +static inline bool nexthop_cmp(const struct nexthop *nh1, + const struct nexthop *nh2) +{ + return nh1 == nh2; +} + static inline bool nexthop_is_multipath(const struct nexthop *nh) { if (nh->is_group) { @@ -193,18 +200,59 @@ static inline bool nexthop_is_blackhole(const struct nexthop *nh) return nhi->reject_nh; } +static inline void nexthop_path_fib_result(struct fib_result *res, int hash) +{ + struct nh_info *nhi; + struct nexthop *nh; + + nh = nexthop_select_path(res->fi->nh, hash); + nhi = rcu_dereference(nh->nh_info); + res->nhc = &nhi->fib_nhc; +} + +/* called with rcu read lock or rtnl held */ +static inline +struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) +{ + struct nh_info *nhi; + + BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); + BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, nhsel); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + return &nhi->fib_nhc; +} + static inline unsigned int fib_info_num_path(const struct fib_info *fi) { + if (unlikely(fi->nh)) + return nexthop_num_path(fi->nh); + return fi->fib_nhs; } +int fib_check_nexthop(struct nexthop *nh, u8 scope, + struct netlink_ext_ack *extack); + static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) { + if (unlikely(fi->nh)) + return nexthop_fib_nhc(fi->nh, nhsel); + return &fi->fib_nh[nhsel].nh_common; } +/* only used when fib_nh is built into fib_info */ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) { + WARN_ON(fi->nh); + return &fi->fib_nh[nhsel]; } #endif diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4a12c69f7fa1..01e587a5dcb1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -236,9 +236,13 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); - change_nexthops(fi) { - fib_nh_release(fi->fib_net, nexthop_nh); - } endfor_nexthops(fi); + if (fi->nh) { + nexthop_put(fi->nh); + } else { + change_nexthops(fi) { + fib_nh_release(fi->fib_net, nexthop_nh); + } endfor_nexthops(fi); + } ip_fib_metrics_put(fi->fib_metrics); @@ -264,11 +268,15 @@ void fib_release_info(struct fib_info *fi) hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); - change_nexthops(fi) { - if (!nexthop_nh->fib_nh_dev) - continue; - hlist_del(&nexthop_nh->nh_hash); - } endfor_nexthops(fi) + if (fi->nh) { + list_del(&fi->nh_list); + } else { + change_nexthops(fi) { + if (!nexthop_nh->fib_nh_dev) + continue; + hlist_del(&nexthop_nh->nh_hash); + } endfor_nexthops(fi) + } fi->fib_dead = 1; fib_info_put(fi); } @@ -279,6 +287,12 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) { const struct fib_nh *onh; + if (fi->nh || ofi->nh) + return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1; + + if (ofi->fib_nhs == 0) + return 0; + for_nexthops(fi) { onh = fib_info_nh(ofi, nhsel); @@ -323,9 +337,14 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) val ^= (fi->fib_protocol << 8) | fi->fib_scope; val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; - for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->fib_nh_oif); - } endfor_nexthops(fi) + + if (fi->nh) { + val ^= fib_devindex_hashfn(fi->nh->id); + } else { + for_nexthops(fi) { + val ^= fib_devindex_hashfn(nh->fib_nh_oif); + } endfor_nexthops(fi) + } return (val ^ (val >> 7) ^ (val >> 12)) & mask; } @@ -352,7 +371,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && - (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) + nh_comp(fi, nfi) == 0) return fi; } @@ -399,6 +418,9 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); + if (fi->nh) + payload += nla_total_size(4); /* RTA_NH_ID */ + if (nhs) { size_t nh_encapsize = 0; /* Also handles the special case nhs == 1 */ @@ -585,6 +607,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +/* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) @@ -683,6 +706,7 @@ errout: return ret; } +/* only called when fib_nh is integrated into fib_info */ static void fib_rebalance(struct fib_info *fi) { int total; @@ -1262,6 +1286,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, { int err; struct fib_info *fi = NULL; + struct nexthop *nh = NULL; struct fib_info *ofi; int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; @@ -1333,14 +1358,25 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi->fib_tb_id = cfg->fc_table; fi->fib_nhs = nhs; - change_nexthops(fi) { - nexthop_nh->nh_parent = fi; - } endfor_nexthops(fi) + if (nh) { + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + err = -EINVAL; + } else { + err = 0; + fi->nh = nh; + } + } else { + change_nexthops(fi) { + nexthop_nh->nh_parent = fi; + } endfor_nexthops(fi) - if (cfg->fc_mp) - err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); - else - err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); + if (cfg->fc_mp) + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, + extack); + else + err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); + } if (err != 0) goto failure; @@ -1371,7 +1407,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } - if (cfg->fc_scope == RT_SCOPE_HOST) { + if (fi->nh) { + err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack); + if (err) + goto failure; + } else if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -1411,14 +1451,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto err_inval; } - change_nexthops(fi) { - fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, - fi->fib_scope); - if (nexthop_nh->fib_nh_gw_family == AF_INET6) - fi->fib_nh_is_v6 = true; - } endfor_nexthops(fi) + if (!fi->nh) { + change_nexthops(fi) { + fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, + fi->fib_scope); + if (nexthop_nh->fib_nh_gw_family == AF_INET6) + fi->fib_nh_is_v6 = true; + } endfor_nexthops(fi) - fib_rebalance(fi); + fib_rebalance(fi); + } link_it: ofi = fib_find_info(fi); @@ -1440,16 +1482,20 @@ link_it: head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; hlist_add_head(&fi->fib_lhash, head); } - change_nexthops(fi) { - struct hlist_head *head; - unsigned int hash; + if (fi->nh) { + list_add(&fi->nh_list, &nh->fi_list); + } else { + change_nexthops(fi) { + struct hlist_head *head; + unsigned int hash; - if (!nexthop_nh->fib_nh_dev) - continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; - hlist_add_head(&nexthop_nh->nh_hash, head); - } endfor_nexthops(fi) + if (!nexthop_nh->fib_nh_dev) + continue; + hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); + head = &fib_info_devhash[hash]; + hlist_add_head(&nexthop_nh->nh_hash, head); + } endfor_nexthops(fi) + } spin_unlock_bh(&fib_info_lock); return fi; @@ -1576,6 +1622,12 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) if (!mp) goto nla_put_failure; + if (unlikely(fi->nh)) { + if (nexthop_mpath_fill_node(skb, fi->nh) < 0) + goto nla_put_failure; + goto mp_end; + } + for_nexthops(fi) { if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) goto nla_put_failure; @@ -1586,6 +1638,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) #endif } endfor_nexthops(fi); +mp_end: nla_nest_end(skb, mp); return 0; @@ -1640,6 +1693,14 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (fi->fib_prefsrc && nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; + + if (fi->nh) { + if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) + goto nla_put_failure; + if (nexthop_is_blackhole(fi->nh)) + rtm->rtm_type = RTN_BLACKHOLE; + } + if (nhs == 1) { const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); unsigned char flags = 0; @@ -1784,6 +1845,8 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed + * + * only used when fib_nh is built into fib_info */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { @@ -1931,6 +1994,8 @@ out: /* * Dead device goes up. We wake up dead nexthops. * It takes sense only on multipath routes. + * + * only used when fib_nh is built into fib_info */ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { @@ -2025,6 +2090,11 @@ void fib_select_multipath(struct fib_result *res, int hash) struct net *net = fi->fib_net; bool first = false; + if (unlikely(res->fi->nh)) { + nexthop_path_fib_result(res, hash); + return; + } + change_nexthops(fi) { if (net->ipv4.sysctl_fib_multipath_use_neigh) { if (!fib_good_nh(nexthop_nh)) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d704d1606b8f..716f2d66cb3f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1461,6 +1461,7 @@ found: fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (unlikely(err < 0)) { +out_reject: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif @@ -1469,6 +1470,12 @@ found: } if (fi->fib_flags & RTNH_F_DEAD) continue; + + if (unlikely(fi->nh && nexthop_is_blackhole(fi->nh))) { + err = fib_props[RTN_BLACKHOLE].error; + goto out_reject; + } + for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index aec4ecb145a0..63cbb04f697f 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -105,6 +105,7 @@ static struct nexthop *nexthop_alloc(void) nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); if (nh) { + INIT_LIST_HEAD(&nh->fi_list); INIT_LIST_HEAD(&nh->grp_list); } return nh; @@ -515,6 +516,54 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) } EXPORT_SYMBOL_GPL(nexthop_select_path); +static int nexthop_check_scope(struct nexthop *nh, u8 scope, + struct netlink_ext_ack *extack) +{ + struct nh_info *nhi; + + nhi = rtnl_dereference(nh->nh_info); + if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { + NL_SET_ERR_MSG(extack, + "Route with host scope can not have a gateway"); + return -EINVAL; + } + + if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { + NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); + return -EINVAL; + } + + return 0; +} + +/* Invoked by fib add code to verify nexthop by id is ok with + * config for prefix; parts of fib_check_nh not done when nexthop + * object is used. + */ +int fib_check_nexthop(struct nexthop *nh, u8 scope, + struct netlink_ext_ack *extack) +{ + int err = 0; + + if (nh->is_group) { + struct nh_group *nhg; + + if (scope == RT_SCOPE_HOST) { + NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); + err = -EINVAL; + goto out; + } + + nhg = rtnl_dereference(nh->nh_grp); + /* all nexthops in a group have the same scope */ + err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack); + } else { + err = nexthop_check_scope(nh, scope, extack); + } +out: + return err; +} + static void nh_group_rebalance(struct nh_group *nhg) { int total = 0; @@ -607,9 +656,24 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) } } +static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) +{ + bool do_flush = false; + struct fib_info *fi; + + list_for_each_entry(fi, &nh->fi_list, nh_list) { + fi->fib_flags |= RTNH_F_DEAD; + do_flush = true; + } + if (do_flush) + fib_flush(net); +} + static void __remove_nexthop(struct net *net, struct nexthop *nh, struct nl_info *nlinfo) { + __remove_nexthop_fib(net, nh); + if (nh->is_group) { remove_nexthop_group(nh, nlinfo); } else { From f88d8ea67fbdbac7a64bfa6ed9a2ba27bb822f74 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:52 -0700 Subject: [PATCH 4/7] ipv6: Plumb support for nexthop object in a fib6_info Add struct nexthop and nh_list list_head to fib6_info. nh_list is the fib6_info side of the nexthop <-> fib_info relationship. Since a fib6_info referencing a nexthop object can not have 'sibling' entries (the old way of doing multipath routes), the nh_list is a union with fib6_siblings. Add f6i_list list_head to 'struct nexthop' to track fib6_info entries using a nexthop instance. Update __remove_nexthop_fib to walk f6_list and delete fib entries using the nexthop. Add a few nexthop helpers for use when a nexthop is added to fib6_info: - nexthop_fib6_nh - return first fib6_nh in a nexthop object - fib6_info_nh_dev moved to nexthop.h and updated to use nexthop_fib6_nh if the fib6_info references a nexthop object - nexthop_path_fib6_result - similar to ipv4, select a path within a multipath nexthop object. If the nexthop is a blackhole, set fib6_result type to RTN_BLACKHOLE, and set the REJECT flag Update the fib6_info references to check for nh and take a different path as needed: - rt6_qualify_for_ecmp - if a fib entry uses a nexthop object it can NOT be coalesced with other fib entries into a multipath route - rt6_duplicate_nexthop - use nexthop_cmp if either fib6_info references a nexthop - addrconf (host routes), RA's and info entries (anything configured via ndisc) does not use nexthop objects - fib6_info_destroy_rcu - put reference to nexthop object - fib6_purge_rt - drop fib6_info from f6i_list - fib6_select_path - update to use the new nexthop_path_fib6_result when fib entry uses a nexthop object - rt6_device_match - update to catch use of nexthop object as a blackhole and set fib6_type and flags. - ip6_route_info_create - don't add space for fib6_nh if fib entry is going to reference a nexthop object, take a reference to nexthop object, disallow use of source routing - rt6_nlmsg_size - add space for RTA_NH_ID - add rt6_fill_node_nexthop to add nexthop data on a dump As with ipv4, most of the changes push existing code into the else branch of whether the fib entry uses a nexthop object. Update the nexthop code to walk f6i_list on a nexthop deleted to remove fib entries referencing it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 11 ++- include/net/ip6_route.h | 13 +++- include/net/nexthop.h | 50 ++++++++++++++ net/ipv4/nexthop.c | 44 ++++++++++++ net/ipv6/addrconf.c | 5 ++ net/ipv6/ip6_fib.c | 22 ++++-- net/ipv6/ndisc.c | 3 +- net/ipv6/route.c | 148 ++++++++++++++++++++++++++++++++++------ 8 files changed, 260 insertions(+), 36 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index ebe5d65f97e0..1a8acd51b277 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -146,7 +146,10 @@ struct fib6_info { * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head fib6_siblings; + union { + struct list_head fib6_siblings; + struct list_head nh_list; + }; unsigned int fib6_nsiblings; refcount_t fib6_ref; @@ -170,6 +173,7 @@ struct fib6_info { unused:3; struct rcu_head rcu; + struct nexthop *nh; struct fib6_nh fib6_nh[0]; }; @@ -441,11 +445,6 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_unlock(); } -static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) -{ - return f6i->fib6_nh->fib_nh_dev; -} - int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a6ce6ea856b9..7375a165fd98 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -27,6 +27,7 @@ struct route_info { #include #include #include +#include #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 @@ -66,10 +67,13 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* fib entries using a nexthop object can not be coalesced into + * a multipath route + */ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { /* the RTF_ADDRCONF flag filters out RA's */ - return !(f6i->fib6_flags & RTF_ADDRCONF) && + return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh && f6i->fib6_nh->fib_nh_gw_family; } @@ -275,8 +279,13 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh; + struct fib6_nh *nha, *nhb; + if (a->nh || b->nh) + return nexthop_cmp(a->nh, b->nh); + + nha = a->fib6_nh; + nhb = b->fib6_nh; return nha->fib_nh_dev == nhb->fib_nh_dev && ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 2912a2d7a515..aff7b2410057 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -10,6 +10,7 @@ #define __LINUX_NEXTHOP_H #include +#include #include #include #include @@ -78,6 +79,7 @@ struct nh_group { struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ + struct list_head f6i_list; /* v6 entries using nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; @@ -255,4 +257,52 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) return &fi->fib_nh[nhsel]; } + +/* + * IPv6 variants + */ +int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, + struct netlink_ext_ack *extack); + +static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) +{ + struct nh_info *nhi; + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, 0); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->family == AF_INET6) + return &nhi->fib6_nh; + + return NULL; +} + +static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) +{ + struct fib6_nh *fib6_nh; + + fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; + return fib6_nh->fib_nh_dev; +} + +static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) +{ + struct nexthop *nh = res->f6i->nh; + struct nh_info *nhi; + + nh = nexthop_select_path(nh, hash); + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->reject_nh) { + res->fib6_type = RTN_BLACKHOLE; + res->fib6_flags |= RTF_REJECT; + res->nh = nexthop_fib6_nh(nh); + } else { + res->nh = &nhi->fib6_nh; + } +} #endif diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 63cbb04f697f..5e48762b6b5f 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -106,6 +106,7 @@ static struct nexthop *nexthop_alloc(void) nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); if (nh) { INIT_LIST_HEAD(&nh->fi_list); + INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); } return nh; @@ -516,6 +517,41 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) } EXPORT_SYMBOL_GPL(nexthop_select_path); +int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + struct nh_info *nhi; + + /* fib6_src is unique to a fib6_info and limits the ability to cache + * routes in fib6_nh within a nexthop that is potentially shared + * across multiple fib entries. If the config wants to use source + * routing it can not use nexthop objects. mlxsw also does not allow + * fib6_src on routes. + */ + if (!ipv6_addr_any(&cfg->fc_src)) { + NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); + return -EINVAL; + } + + if (nh->is_group) { + struct nh_group *nhg; + + nhg = rtnl_dereference(nh->nh_grp); + if (nhg->has_v4) + goto no_v4_nh; + } else { + nhi = rtnl_dereference(nh->nh_info); + if (nhi->family == AF_INET) + goto no_v4_nh; + } + + return 0; +no_v4_nh: + NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); + return -EINVAL; +} +EXPORT_SYMBOL_GPL(fib6_check_nexthop); + static int nexthop_check_scope(struct nexthop *nh, u8 scope, struct netlink_ext_ack *extack) { @@ -658,6 +694,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) { + struct fib6_info *f6i, *tmp; bool do_flush = false; struct fib_info *fi; @@ -667,6 +704,13 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) } if (do_flush) fib_flush(net); + + /* ip6_del_rt removes the entry from this list hence the _safe */ + list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { + /* __ip6_del_rt does a release, so do a hold here */ + fib6_info_hold(f6i); + ipv6_stub->ip6_del_rt(net, f6i); + } } static void __remove_nexthop(struct net *net, struct nexthop *nh, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4c30726fa7c7..d7fff86c2ef0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2421,6 +2421,10 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { + /* prefix routes only use builtin fib6_nh */ + if (rt->nh) + continue; + if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex) continue; if (no_gw && rt->fib6_nh->fib_nh_gw_family) @@ -6352,6 +6356,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) list_for_each_entry(ifa, &idev->addr_list, if_list) { spin_lock(&ifa->lock); if (ifa->rt) { + /* host routes only use builtin fib6_nh */ struct fib6_nh *nh = ifa->rt->fib6_nh; int cpu; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cdfb8500ccae..02feda73a98e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -159,6 +159,7 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh) if (!f6i) return NULL; + /* fib6_siblings is a union with nh_list, so this initializes both */ INIT_LIST_HEAD(&f6i->fib6_siblings); refcount_set(&f6i->fib6_ref, 1); @@ -171,7 +172,11 @@ void fib6_info_destroy_rcu(struct rcu_head *head) WARN_ON(f6i->fib6_node); - fib6_nh_release(f6i->fib6_nh); + if (f6i->nh) + nexthop_put(f6i->nh); + else + fib6_nh_release(f6i->fib6_nh); + ip_fib_metrics_put(f6i->fib6_metrics); kfree(f6i); } @@ -927,6 +932,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, fib6_drop_pcpu_from(rt, table); + if (rt->nh && !list_empty(&rt->nh_list)) + list_del_init(&rt->nh_list); + if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, @@ -1334,6 +1342,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, err = fib6_add_rt2node(fn, rt, info, extack); if (!err) { + if (rt->nh) + list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); } @@ -2295,9 +2305,13 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) { struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + struct fib6_nh *fib6_nh = rt->fib6_nh; unsigned int flags = rt->fib6_flags; const struct net_device *dev; + if (rt->nh) + fib6_nh = nexthop_fib6_nh(rt->nh); + seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); #ifdef CONFIG_IPV6_SUBTREES @@ -2305,14 +2319,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_nh->fib_nh_gw_family) { + if (fib6_nh->fib_nh_gw_family) { flags |= RTF_GATEWAY; - seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6); + seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6); } else { seq_puts(seq, "00000000000000000000000000000000"); } - dev = rt->fib6_nh->fib_nh_dev; + dev = fib6_nh->fib_nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", rt->fib6_metric, refcount_read(&rt->fib6_ref), 0, flags, dev ? dev->name : ""); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f874dde1ee85..6e3c51109c83 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1289,9 +1289,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) !in6_dev->cnf.accept_ra_rtr_pref) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif - + /* routes added from RAs do not use nexthop objects */ rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); - if (rt) { neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9b9a0159f7fd..df5be3d5d3e5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -432,15 +432,21 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; - if (!match->fib6_nsiblings || have_oif_match) + if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) goto out; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ - if (!fl6->mp_hash) + if (!fl6->mp_hash && + (!match->nh || nexthop_is_multipath(match->nh))) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); + if (unlikely(match->nh)) { + nexthop_path_fib6_result(res, fl6->mp_hash); + return; + } + if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; @@ -496,7 +502,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res, struct fib6_nh *nh; if (!oif && ipv6_addr_any(saddr)) { - nh = f6i->fib6_nh; + if (unlikely(f6i->nh)) { + nh = nexthop_fib6_nh(f6i->nh); + if (nexthop_is_blackhole(f6i->nh)) + goto out_blackhole; + } else { + nh = f6i->fib6_nh; + } if (!(nh->fib_nh_flags & RTNH_F_DEAD)) goto out; } @@ -515,7 +527,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res, goto out; } - nh = f6i->fib6_nh; + if (unlikely(f6i->nh)) { + nh = nexthop_fib6_nh(f6i->nh); + if (nexthop_is_blackhole(f6i->nh)) + goto out_blackhole; + } else { + nh = f6i->fib6_nh; + } + if (nh->fib_nh_flags & RTNH_F_DEAD) { res->f6i = net->ipv6.fib6_null_entry; nh = res->f6i->fib6_nh; @@ -524,6 +543,12 @@ out: res->nh = nh; res->fib6_type = res->f6i->fib6_type; res->fib6_flags = res->f6i->fib6_flags; + return; + +out_blackhole: + res->fib6_flags |= RTF_REJECT; + res->fib6_type = RTN_BLACKHOLE; + res->nh = nh; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -1117,6 +1142,8 @@ restart: rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); goto out; + } else if (res.fib6_flags & RTF_REJECT) { + goto do_create; } fib6_select_path(net, &res, fl6, fl6->flowi6_oif, @@ -1128,6 +1155,7 @@ restart: if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); } else { +do_create: rt = ip6_create_rt_rcu(&res); } @@ -3217,7 +3245,9 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; + struct nexthop *nh = NULL; struct fib6_table *table; + struct fib6_nh *fib6_nh; int err = -EINVAL; int addr_type; @@ -3270,7 +3300,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; err = -ENOMEM; - rt = fib6_info_alloc(gfp_flags, true); + rt = fib6_info_alloc(gfp_flags, !nh); if (!rt) goto out; @@ -3310,19 +3340,35 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); - if (err) - goto out; + if (nh) { + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + goto out; + } + if (rt->fib6_src.plen) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used wtih source routing"); + goto out; + } + rt->nh = nh; + fib6_nh = nexthop_fib6_nh(rt->nh); + } else { + err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); + if (err) + goto out; - /* We cannot add true routes via loopback here, - * they would result in kernel looping; promote them to reject routes - */ - addr_type = ipv6_addr_type(&cfg->fc_dst); - if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type)) - rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; + fib6_nh = rt->fib6_nh; + + /* We cannot add true routes via loopback here, they would + * result in kernel looping; promote them to reject routes + */ + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, + addr_type)) + rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; + } if (!ipv6_addr_any(&cfg->fc_prefsrc)) { - struct net_device *dev = fib6_info_nh_dev(rt); + struct net_device *dev = fib6_nh->fib_nh_dev; if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); @@ -3678,6 +3724,9 @@ static struct fib6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { + /* these routes do not use nexthops */ + if (rt->nh) + continue; if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex) continue; if (!(rt->fib6_flags & RTF_ROUTEINFO) || @@ -3741,8 +3790,13 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - struct fib6_nh *nh = rt->fib6_nh; + struct fib6_nh *nh; + /* RA routes do not use nexthops */ + if (rt->nh) + continue; + + nh = rt->fib6_nh; if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&nh->fib_nh_gw6, addr)) @@ -3993,7 +4047,8 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) && + if (!rt->nh && + ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@ -4021,8 +4076,13 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - struct fib6_nh *nh = rt->fib6_nh; + struct fib6_nh *nh; + /* RA routes do not use nexthops */ + if (rt->nh) + return 0; + + nh = rt->fib6_nh; if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6)) return -1; @@ -4069,6 +4129,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) return NULL; } +/* only called for fib entries with builtin fib6_nh */ static bool rt6_is_dead(const struct fib6_info *rt) { if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD || @@ -4147,7 +4208,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && + if (rt != net->ipv6.fib6_null_entry && !rt->nh && rt->fib6_nh->fib_nh_dev == arg->dev) { rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); @@ -4172,6 +4233,7 @@ void rt6_sync_up(struct net_device *dev, unsigned char nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } +/* only called for fib entries with inline fib6_nh */ static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { @@ -4232,7 +4294,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) const struct net_device *dev = arg->dev; struct net *net = dev_net(dev); - if (rt == net->ipv6.fib6_null_entry) + if (rt == net->ipv6.fib6_null_entry || rt->nh) return 0; switch (arg->event) { @@ -4786,6 +4848,9 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; + if (rt->nh) + nexthop_len += nla_total_size(4); /* RTA_NH_ID */ + if (rt->fib6_nsiblings) { nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) @@ -4812,6 +4877,35 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nexthop_len; } +static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, + unsigned char *flags) +{ + if (nexthop_is_multipath(nh)) { + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + if (nexthop_mpath_fill_node(skb, nh)) + goto nla_put_failure; + + nla_nest_end(skb, mp); + } else { + struct fib6_nh *fib6_nh; + + fib6_nh = nexthop_fib6_nh(nh); + if (fib_nexthop_info(skb, &fib6_nh->nh_common, + flags, false) < 0) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@ -4821,6 +4915,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt6 = (struct rt6_info *)dst; struct rt6key *rt6_dst, *rt6_src; u32 *pmetrics, table, rt6_flags; + unsigned char nh_flags = 0; struct nlmsghdr *nlh; struct rtmsg *rtm; long expires = 0; @@ -4940,9 +5035,18 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, } nla_nest_end(skb, mp); - } else { - unsigned char nh_flags = 0; + } else if (rt->nh) { + if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) + goto nla_put_failure; + if (nexthop_is_blackhole(rt->nh)) + rtm->rtm_type = RTN_BLACKHOLE; + + if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) + goto nla_put_failure; + + rtm->rtm_flags |= nh_flags; + } else { if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, &nh_flags, false) < 0) goto nla_put_failure; From 54250805d8e436d46ca391219802f2a23b948e8d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:53 -0700 Subject: [PATCH 5/7] mlxsw: Fail attempts to use routes with nexthop objects Fail attempts to use nexthop objects with routes until support can be properly added. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4f781358aef1..23f17ea52061 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6122,6 +6122,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } + if (fen_info->fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + } else if (info->family == AF_INET6) { + struct fib6_entry_notifier_info *fen6_info; + + fen6_info = container_of(info, + struct fib6_entry_notifier_info, + info); + if (fen6_info->rt->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } } break; } From 6a87afc072c37880ae60fa3dfb74cc252038fac1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:54 -0700 Subject: [PATCH 6/7] mlx5: Fail attempts to use routes with nexthop objects Fail attempts to use nexthop objects with routes until support can be properly added. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 2cbfaa8da7fc..e69766393990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -262,6 +262,10 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; + if (fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[0].netdev && fib_dev != ldev->pf[1].netdev) { From dbcc4fa718eed0fd392eb098631f33af04014d63 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jun 2019 20:19:55 -0700 Subject: [PATCH 7/7] rocker: Fail attempts to use routes with nexthop objects Fail attempts to use nexthop objects with routes until support can be properly added. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 7ae6c124bfe9..45b3325c3a38 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2214,6 +2214,10 @@ static int rocker_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } + if (fen_info->fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } } memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));