From f3aaaaaae2633281ad2a4d9b835d4093f0b0a2c8 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:03 -0700 Subject: [PATCH 01/28] ice: Make ice_msix_clean_rings static commit 158a08a694c4e ("ice: remove ndo_poll_controller") removed ice_netpoll and introduced a namespace warning for ice_msix_clean_rings. Fix the namespace warning by making ice_msix_clean_rings static. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 49f1940772ed..e750702bcdce 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -433,7 +433,7 @@ int ice_vsi_clear(struct ice_vsi *vsi) * @irq: interrupt number * @data: pointer to a q_vector */ -irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) +static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 677db40338f5..3831b4f0960a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -73,5 +73,4 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); -irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data); #endif /* !_ICE_LIB_H_ */ From 633d7449a30133e99a5182fb74f40594801497b1 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:04 -0700 Subject: [PATCH 02/28] ice: Change device ID define names to align with branding string Basically remove references to C810 and use E810C (from the branding string) instead. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_devids.h | 6 +++--- drivers/net/ethernet/intel/ice/ice_main.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index a6f0a5c0c305..f8d5c661d0ba 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -6,10 +6,10 @@ /* Device IDs */ /* Intel(R) Ethernet Controller E810-C for backplane */ -#define ICE_DEV_ID_C810_BACKPLANE 0x1591 +#define ICE_DEV_ID_E810C_BACKPLANE 0x1591 /* Intel(R) Ethernet Controller E810-C for QSFP */ -#define ICE_DEV_ID_C810_QSFP 0x1592 +#define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ -#define ICE_DEV_ID_C810_SFP 0x1593 +#define ICE_DEV_ID_E810C_SFP 0x1593 #endif /* _ICE_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8f61b375e768..0084b7290b2b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2271,9 +2271,9 @@ static void ice_remove(struct pci_dev *pdev) * Class, Class Mask, private data (not used) } */ static const struct pci_device_id ice_pci_tbl[] = { - { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 }, - { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, /* required last entry */ { 0, } }; From ac5a8aef112e194c7d41988c4f32112085ebc22f Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:05 -0700 Subject: [PATCH 03/28] ice: Update expected FW version Update to the current firmware major and minor version which are 1 and 3 respectively. Also remove an empty comment line. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 437f832fd7c4..0038a4109c99 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -19,11 +19,10 @@ /* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. - * */ #define EXP_FW_API_VER_BRANCH 0x00 -#define EXP_FW_API_VER_MAJOR 0x00 -#define EXP_FW_API_VER_MINOR 0x01 +#define EXP_FW_API_VER_MAJOR 0x01 +#define EXP_FW_API_VER_MINOR 0x03 /* Different control queue types: These are mainly for SW consumption. */ enum ice_ctl_q { From 99189e8b6b0ea3e561df8602982b796a146a6747 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:06 -0700 Subject: [PATCH 04/28] ice: Use capability count returned by the firmware The firmware now returns the capability count in the command buffer. Use it. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index c52f450f2c0d..78df54b25bf1 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1531,9 +1531,7 @@ ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count, if (!status) ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc); else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOMEM) - *cap_count = - DIV_ROUND_UP(le16_to_cpu(desc.datalen), - sizeof(struct ice_aqc_list_caps_elem)); + *cap_count = le32_to_cpu(cmd->count); return status; } From d7e38611b81e6d7e14969c361f2b9fc07403a6c3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:58:59 -0700 Subject: [PATCH 05/28] net/ipv4: Put target net when address dump fails due to bad attributes If tgt_net is set based on IFA_TARGET_NETNSID attribute in the dump request, make sure all error paths call put_net. Fixes: 5fcd266a9f64 ("net/ipv4: Add support for dumping addresses for a specific device") Fixes: c33078e3dfb1 ("net/ipv4: Update inet_dump_ifaddr for strict data checking") Reported-by: Li RongQing Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 63d5b58fbfdb..9250b309c742 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1761,7 +1761,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct in_device *in_dev; struct hlist_head *head; - int err; + int err = 0; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -1771,12 +1771,15 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - return err; + goto put_tgt_net; + err = 0; if (fillargs.ifindex) { dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) - return -ENODEV; + if (!dev) { + err = -ENODEV; + goto put_tgt_net; + } in_dev = __in_dev_get_rtnl(dev); if (in_dev) { @@ -1821,7 +1824,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len; + return err < 0 ? err : skb->len; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, From 242afaa6968cde96824247ab984c24c466ca29f3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:00 -0700 Subject: [PATCH 06/28] net/ipv6: Put target net when address dump fails due to bad attributes If tgt_net is set based on IFA_TARGET_NETNSID attribute in the dump request, make sure all error paths call put_net. Fixes: 6371a71f3a3b ("net/ipv6: Add support for dumping addresses for a specific device") Fixes: ed6eff11790a ("net/ipv6: Update inet6_dump_addr for strict data checking") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 45b84dd5c4eb..7eb09c86fa13 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5089,23 +5089,25 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; + int err = 0; s_h = cb->args[0]; s_idx = idx = cb->args[1]; s_ip_idx = cb->args[2]; if (cb->strict_check) { - int err; - err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - return err; + goto put_tgt_net; + err = 0; if (fillargs.ifindex) { dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) - return -ENODEV; + if (!dev) { + err = -ENODEV; + goto put_tgt_net; + } idev = __in6_dev_get(dev); if (idev) { err = in6_dump_addrs(idev, skb, cb, s_ip_idx, @@ -5144,7 +5146,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len; + return err < 0 ? err : skb->len; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) From ae677bbb4441309e1827e60413de92363153dccb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:01 -0700 Subject: [PATCH 07/28] net: Don't return invalid table id error when dumping all families When doing a route dump across all address families, do not error out if the table does not exist. This allows a route dump for AF_UNSPEC with a table id that may only exist for some of the families. Do return the table does not exist error if dumping routes for a specific family and the table does not exist. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 4 ++++ net/ipv4/ipmr.c | 3 +++ net/ipv6/ip6_fib.c | 3 +++ net/ipv6/ip6mr.c | 3 +++ 5 files changed, 14 insertions(+) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e8d9456bf36e..c5969762a8f4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,6 +226,7 @@ struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; + bool dump_all_families; unsigned char protocol; unsigned char rt_type; unsigned int flags; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5bf653f36911..6df95be96311 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -829,6 +829,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } + filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; @@ -899,6 +900,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); return -ENOENT; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7a3e2acda94c..a6defbec4f1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2542,6 +2542,9 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); return -ENOENT; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2a058b408a6a..1b8bc008b53b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -620,6 +620,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (arg.filter.table_id) { tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { + if (arg.filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); return -ENOENT; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index c3317ffb09eb..e2ea691e42c6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2473,6 +2473,9 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); return -ENOENT; } From c63586dc9b3ed5d45ba82e16bf9e2170a55521e6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:02 -0700 Subject: [PATCH 08/28] net: rtnl_dump_all needs to propagate error from dumpit function If an address, route or netconf dump request is sent for AF_UNSPEC, then rtnl_dump_all is used to do the dump across all address families. If one of the dumpit functions fails (e.g., invalid attributes in the dump request) then rtnl_dump_all needs to propagate that error so the user gets an appropriate response instead of just getting no data. Fixes: effe67926624 ("net: Enable kernel side filtering of route dumps") Fixes: 5fcd266a9f64 ("net/ipv4: Add support for dumping addresses for a specific device") Fixes: 6371a71f3a3b ("net/ipv6: Add support for dumping addresses for a specific device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0958c7be2c22..f679c7a7d761 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3333,6 +3333,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) int idx; int s_idx = cb->family; int type = cb->nlh->nlmsg_type - RTM_BASE; + int ret = 0; if (s_idx == 0) s_idx = 1; @@ -3365,12 +3366,13 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 0; cb->seq = 0; } - if (dumpit(skb, cb)) + ret = dumpit(skb, cb); + if (ret < 0) break; } cb->family = idx; - return skb->len; + return skb->len ? : ret; } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, From db4f1be3ca9b0ef7330763d07bf4ace83ad6f913 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 23 Oct 2018 16:04:31 -0600 Subject: [PATCH 09/28] net: udp: fix handling of CHECKSUM_COMPLETE packets Current handling of CHECKSUM_COMPLETE packets by the UDP stack is incorrect for any packet that has an incorrect checksum value. udp4/6_csum_init() will both make a call to __skb_checksum_validate_complete() to initialize/validate the csum field when receiving a CHECKSUM_COMPLETE packet. When this packet fails validation, skb->csum will be overwritten with the pseudoheader checksum so the packet can be fully validated by software, but the skb->ip_summed value will be left as CHECKSUM_COMPLETE so that way the stack can later warn the user about their hardware spewing bad checksums. Unfortunately, leaving the SKB in this state can cause problems later on in the checksum calculation. Since the the packet is still marked as CHECKSUM_COMPLETE, udp_csum_pull_header() will SUBTRACT the checksum of the UDP header from skb->csum instead of adding it, leaving us with a garbage value in that field. Once we try to copy the packet to userspace in the udp4/6_recvmsg(), we'll make a call to skb_copy_and_csum_datagram_msg() to checksum the packet data and add it in the garbage skb->csum value to perform our final validation check. Since the value we're validating is not the proper checksum, it's possible that the folded value could come out to 0, causing us not to drop the packet. Instead, we believe that the packet was checksummed incorrectly by hardware since skb->ip_summed is still CHECKSUM_COMPLETE, and we attempt to warn the user with netdev_rx_csum_fault(skb->dev); Unfortunately, since this is the UDP path, skb->dev has been overwritten by skb->dev_scratch and is no longer a valid pointer, so we end up reading invalid memory. This patch addresses this problem in two ways: 1) Do not use the dev pointer when calling netdev_rx_csum_fault() from skb_copy_and_csum_datagram_msg(). Since this gets called from the UDP path where skb->dev has been overwritten, we have no way of knowing if the pointer is still valid. Also for the sake of consistency with the other uses of netdev_rx_csum_fault(), don't attempt to call it if the packet was checksummed by software. 2) Add better CHECKSUM_COMPLETE handling to udp4/6_csum_init(). If we receive a packet that's CHECKSUM_COMPLETE that fails verification (i.e. skb->csum_valid == 0), check who performed the calculation. It's possible that the checksum was done in software by the network stack earlier (such as Netfilter's CONNTRACK module), and if that says the checksum is bad, we can drop the packet immediately instead of waiting until we try and copy it to userspace. Otherwise, we need to mark the SKB as CHECKSUM_NONE, since the skb->csum field no longer contains the full packet checksum after the call to __skb_checksum_validate_complete(). Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Fixes: c84d949057ca ("udp: copy skb->truesize in the first cache line") Cc: Sam Kumar Cc: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- net/core/datagram.c | 5 +++-- net/ipv4/udp.c | 20 ++++++++++++++++++-- net/ipv6/ip6_checksum.c | 20 ++++++++++++++++++-- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 6a034eb538a1..57f3a6fcfc1e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -808,8 +808,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, return -EINVAL; } - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(NULL); } return 0; fault: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf8252d05a01..7e048288fcab 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2120,8 +2120,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat it as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 547515e8450a..377717045f8f 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } EXPORT_SYMBOL(udp6_csum_init); From cdaa18f9bd98df75d4ce9478f72c0a5d11562249 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Oct 2018 11:32:21 +0300 Subject: [PATCH 10/28] octeontx2-af: Copy the right amount of memory This is a copy and paste bug where we copied the sizeof() from the chunk before. We're copying more data than intended but the destination is a union so it doesn't cause memory corruption. Fixes: ffb0abd7e9cb ("octeontx2-af: NIX AQ instruction enqueue support") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 8890c95831ca..a4eac3b9ee72 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -573,7 +573,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, sizeof(struct nix_cq_ctx_s)); else if (req->ctype == NIX_AQ_CTYPE_RSS) memcpy(&rsp->rss, ctx, - sizeof(struct nix_cq_ctx_s)); + sizeof(struct nix_rsse_s)); else if (req->ctype == NIX_AQ_CTYPE_MCE) memcpy(&rsp->mce, ctx, sizeof(struct nix_rx_mce_s)); From ac0e549678d60097793a6a818ece319e99d1ae9a Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Wed, 24 Oct 2018 15:32:18 +0100 Subject: [PATCH 11/28] net: hns3: Fix for warning uninitialized symbol hw_err_lst3 This patch fixes the smatch warning, drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c:700 hclge_log_and_clear_ppp_error() error: uninitialized symbol 'hw_err_lst3' Link: https://lkml.org/lkml/2018/10/23/430 Fixes: da2d072a9ea7 ("net: hns3: Add enable and process hw errors from PPP") Reported-by: Dan Carpenter Signed-off-by: Shiju Jose Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index f7e363b90fe0..dca6f2326c26 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -859,10 +859,12 @@ static int hclge_log_and_clear_ppp_error(struct hclge_dev *hdev, u32 cmd, reset_level = HNAE3_FUNC_RESET; } - err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3; - if (err_sts) { - hclge_log_error(dev, hw_err_lst3, err_sts); - reset_level = HNAE3_FUNC_RESET; + if (cmd == HCLGE_PPP_CMD0_INT_CMD) { + err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3; + if (err_sts) { + hclge_log_error(dev, hw_err_lst3, err_sts); + reset_level = HNAE3_FUNC_RESET; + } } /* clear PPP INT */ From f203dca363f837b1e5a4e0c018264680aab90307 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:07 -0700 Subject: [PATCH 12/28] ice: Introduce ice_dev_onetime_setup ice_dev_onetime_setup contains a couple of driver workarounds for current firmware limitations. These workarounds are expected to go away once these limitations are fixed in the firmware. On a firmware release that has these issues addressed, these workarounds (while unnecessary) will not break anything. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 19 +++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_common.h | 3 +++ .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 ++ drivers/net/ethernet/intel/ice/ice_lib.c | 1 + 4 files changed, 25 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 78df54b25bf1..5a91a9087d1e 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -42,6 +42,23 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) return 0; } +/** + * ice_dev_onetime_setup - Temporary HW/FW workarounds + * @hw: pointer to the HW structure + * + * This function provides temporary workarounds for certain issues + * that are expected to be fixed in the HW/FW. + */ +void ice_dev_onetime_setup(struct ice_hw *hw) +{ + /* configure Rx - set non pxe mode */ + wr32(hw, GLLAN_RCTL_0, 0x1); + +#define MBX_PF_VT_PFALLOC 0x00231E80 + /* set VFs per PF */ + wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF)); +} + /** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure @@ -740,6 +757,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_sched; + ice_dev_onetime_setup(hw); + /* Get MAC information */ /* A single port can report up to two (LAN and WoL) addresses */ mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2, diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 1900681289a4..876347e32b6f 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -34,6 +34,9 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_sq_cd *cd); void ice_clear_pxe_mode(struct ice_hw *hw); enum ice_status ice_get_caps(struct ice_hw *hw); + +void ice_dev_onetime_setup(struct ice_hw *hw); + enum ice_status ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index); diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index a6679a9bfd3a..228afcad6fc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -157,6 +157,7 @@ #define VPINT_ALLOC_LAST_S 12 #define VPINT_ALLOC_LAST_M ICE_M(0x7FF, 12) #define VPINT_ALLOC_VALID_M BIT(31) +#define GLLAN_RCTL_0 0x002941F8 #define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4)) #define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4)) #define QRX_CTRL_MAX_INDEX 2047 @@ -320,6 +321,7 @@ #define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8)) #define GLV_UPTCH(_i) (0x0030A004 + ((_i) * 8)) #define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8)) +#define PF_VT_PFALLOC_HIF 0x0009DD80 #define VSIQF_HKEY_MAX_INDEX 12 #define VSIQF_HLUT_MAX_INDEX 15 #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index e750702bcdce..5bacad01f0c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2529,6 +2529,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) vsi->hw_base_vector = 0; ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi, false); + ice_dev_onetime_setup(&vsi->back->hw); ice_vsi_set_num_qs(vsi); /* Initialize VSI struct elements and create VSI in FW */ From 982b1219182ecd5a18a08caaf9be59a8373ac764 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:08 -0700 Subject: [PATCH 13/28] ice: Allocate VF interrupts and set queue map Allocate VF interrupts using VPINT_ALLOC_PCI. Multiple interrupts are specified as a range from "first" to "last". Also, according to the spec, the queue mapping for a VF needs to be set in both contig and scatter queue modes. So make this change as well. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 6 ++++++ drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 15 +++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 228afcad6fc3..5fdea6ec7675 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -157,6 +157,12 @@ #define VPINT_ALLOC_LAST_S 12 #define VPINT_ALLOC_LAST_M ICE_M(0x7FF, 12) #define VPINT_ALLOC_VALID_M BIT(31) +#define VPINT_ALLOC_PCI(_VF) (0x0009D000 + ((_VF) * 4)) +#define VPINT_ALLOC_PCI_FIRST_S 0 +#define VPINT_ALLOC_PCI_FIRST_M ICE_M(0x7FF, 0) +#define VPINT_ALLOC_PCI_LAST_S 12 +#define VPINT_ALLOC_PCI_LAST_M ICE_M(0x7FF, 12) +#define VPINT_ALLOC_PCI_VALID_M BIT(31) #define GLLAN_RCTL_0 0x002941F8 #define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4)) #define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index c25e486706f3..45f10f8f01dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -173,6 +173,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) vsi = pf->vsi[vf->lan_vsi_idx]; wr32(hw, VPINT_ALLOC(vf->vf_id), 0); + wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); first = vf->first_vector_idx; last = first + pf->num_vf_msix - 1; @@ -519,6 +520,10 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) VPINT_ALLOC_VALID_M); wr32(hw, VPINT_ALLOC(vf->vf_id), reg); + reg = (((first << VPINT_ALLOC_PCI_FIRST_S) & VPINT_ALLOC_PCI_FIRST_M) | + ((last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) | + VPINT_ALLOC_PCI_VALID_M); + wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg); /* map the interrupts to its functions */ for (v = first; v <= last; v++) { reg = (((abs_vf_id << GLINT_VECT2FUNC_VF_NUM_S) & @@ -528,10 +533,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) wr32(hw, GLINT_VECT2FUNC(v), reg); } + /* set regardless of mapping mode */ + wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M); + /* VF Tx queues allocation */ if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) { - wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), - VPLAN_TXQ_MAPENA_TX_ENA_M); /* set the VF PF Tx queue range * VFNUMQ value should be set to (number of queues - 1). A value * of 0 means 1 queue and a value of 255 means 256 queues @@ -546,10 +552,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) "Scattered mode for VF Tx queues is not yet implemented\n"); } + /* set regardless of mapping mode */ + wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M); + /* VF Rx queues allocation */ if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) { - wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), - VPLAN_RXQ_MAPENA_RX_ENA_M); /* set the VF PF Rx queue range * VFNUMQ value should be set to (number of queues - 1). A value * of 0 means 1 queue and a value of 255 means 256 queues From 4f4be03bdeb32761f89cec3f2bec5e69dd41bc82 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Thu, 18 Oct 2018 08:37:09 -0700 Subject: [PATCH 14/28] ice: Poll for link status change When the physical link goes up or down, the driver is supposed to receive a link status event (LSE). The driver currently has the code to handle LSEs but there is no firmware support for this feature yet. So this patch adds the ability for the driver to poll for link status changes. The polling itself is done in ice_watchdog_subtask. For namespace cleanliness, this patch also removes code that handles LSE. This code will be reintroduced once the feature is officially supported. Signed-off-by: Anirudh Venkataramanan Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_common.c | 29 +----- drivers/net/ethernet/intel/ice/ice_common.h | 6 -- drivers/net/ethernet/intel/ice/ice_main.c | 110 +++++--------------- 3 files changed, 25 insertions(+), 120 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 5a91a9087d1e..8cd6a2401fd9 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -235,7 +235,7 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi) * * Get Link Status (0x607). Returns the link status of the adapter. */ -enum ice_status +static enum ice_status ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, struct ice_link_status *link, struct ice_sq_cd *cd) { @@ -2004,33 +2004,6 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); } -/** - * ice_aq_set_event_mask - * @hw: pointer to the hw struct - * @port_num: port number of the physical function - * @mask: event mask to be set - * @cd: pointer to command details structure or NULL - * - * Set event mask (0x0613) - */ -enum ice_status -ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, - struct ice_sq_cd *cd) -{ - struct ice_aqc_set_event_mask *cmd; - struct ice_aq_desc desc; - - cmd = &desc.params.set_event_mask; - - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask); - - cmd->lport_num = port_num; - - cmd->event_mask = cpu_to_le16(mask); - - return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); -} - /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 876347e32b6f..cf760c24a6aa 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -86,12 +86,6 @@ enum ice_status ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, struct ice_sq_cd *cd); enum ice_status -ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, - struct ice_link_status *link, struct ice_sq_cd *cd); -enum ice_status -ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, - struct ice_sq_cd *cd); -enum ice_status ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids, u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num, struct ice_sq_cd *cmd_details); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0084b7290b2b..05993451147a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -456,35 +456,6 @@ static void ice_reset_subtask(struct ice_pf *pf) } } -/** - * ice_watchdog_subtask - periodic tasks not using event driven scheduling - * @pf: board private structure - */ -static void ice_watchdog_subtask(struct ice_pf *pf) -{ - int i; - - /* if interface is down do nothing */ - if (test_bit(__ICE_DOWN, pf->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) - return; - - /* make sure we don't do these things too often */ - if (time_before(jiffies, - pf->serv_tmr_prev + pf->serv_tmr_period)) - return; - - pf->serv_tmr_prev = jiffies; - - /* Update the stats for active netdevs so the network stack - * can look at updated numbers whenever it cares to - */ - ice_update_pf_stats(pf); - for (i = 0; i < pf->num_alloc_vsi; i++) - if (pf->vsi[i] && pf->vsi[i]->netdev) - ice_update_vsi_stats(pf->vsi[i]); -} - /** * ice_print_link_msg - print link up or down message * @vsi: the VSI whose link status is being queried @@ -554,36 +525,6 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) speed, fc); } -/** - * ice_init_link_events - enable/initialize link events - * @pi: pointer to the port_info instance - * - * Returns -EIO on failure, 0 on success - */ -static int ice_init_link_events(struct ice_port_info *pi) -{ - u16 mask; - - mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | - ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); - - if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to set link event mask for port %d\n", - pi->lport); - return -EIO; - } - - if (ice_aq_get_link_info(pi, true, NULL, NULL)) { - dev_dbg(ice_hw_to_dev(pi->hw), - "Failed to enable link events for port %d\n", - pi->lport); - return -EIO; - } - - return 0; -} - /** * ice_vsi_link_event - update the vsi's netdev * @vsi: the vsi on which the link event occurred @@ -671,27 +612,35 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi) } /** - * ice_handle_link_event - handle link event via ARQ - * @pf: pf that the link event is associated with - * - * Return -EINVAL if port_info is null - * Return status on succes + * ice_watchdog_subtask - periodic tasks not using event driven scheduling + * @pf: board private structure */ -static int ice_handle_link_event(struct ice_pf *pf) +static void ice_watchdog_subtask(struct ice_pf *pf) { - struct ice_port_info *port_info; - int status; + int i; - port_info = pf->hw.port_info; - if (!port_info) - return -EINVAL; + /* if interface is down do nothing */ + if (test_bit(__ICE_DOWN, pf->state) || + test_bit(__ICE_CFG_BUSY, pf->state)) + return; - status = ice_link_event(pf, port_info); - if (status) - dev_dbg(&pf->pdev->dev, - "Could not process link event, error %d\n", status); + /* make sure we don't do these things too often */ + if (time_before(jiffies, + pf->serv_tmr_prev + pf->serv_tmr_period)) + return; - return status; + pf->serv_tmr_prev = jiffies; + + if (ice_link_event(pf, pf->hw.port_info)) + dev_dbg(&pf->pdev->dev, "ice_link_event failed\n"); + + /* Update the stats for active netdevs so the network stack + * can look at updated numbers whenever it cares to + */ + ice_update_pf_stats(pf); + for (i = 0; i < pf->num_alloc_vsi; i++) + if (pf->vsi[i] && pf->vsi[i]->netdev) + ice_update_vsi_stats(pf->vsi[i]); } /** @@ -797,11 +746,6 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) opcode = le16_to_cpu(event.desc.opcode); switch (opcode) { - case ice_aqc_opc_get_link_status: - if (ice_handle_link_event(pf)) - dev_err(&pf->pdev->dev, - "Could not handle link event\n"); - break; case ice_mbx_opc_send_msg_to_pf: ice_vc_process_vf_msg(pf, &event); break; @@ -2207,12 +2151,6 @@ static int ice_probe(struct pci_dev *pdev, /* since everything is good, start the service timer */ mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period)); - err = ice_init_link_events(pf->hw.port_info); - if (err) { - dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err); - goto err_alloc_sw_unroll; - } - return 0; err_alloc_sw_unroll: From e72bde6b66299602087c8c2350d36a525e75d06e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 08:32:49 -0700 Subject: [PATCH 15/28] net: sched: Remove TCA_OPTIONS from policy Marco reported an error with hfsc: root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1 Error: Attribute failed policy validation. Apparently a few implementations pass TCA_OPTIONS as a binary instead of nested attribute, so drop TCA_OPTIONS from the policy. Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes") Reported-by: Marco Berizzi Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/sched/sch_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 022bca98bde6..ca3b0f46de53 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1320,7 +1320,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, - [TCA_OPTIONS] = { .type = NLA_NESTED }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, From 4ed591c8ab44e711e56b8e021ffaf4f407c045f5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 13:58:39 -0700 Subject: [PATCH 16/28] net/ipv6: Allow onlink routes to have a device mismatch if it is the default route The intent of ip6_route_check_nh_onlink is to make sure the gateway given for an onlink route is not actually on a connected route for a different interface (e.g., 2001:db8:1::/64 is on dev eth1 and then an onlink route has a via 2001:db8:1::1 dev eth2). If the gateway lookup hits the default route then it most likely will be a different interface than the onlink route which is ok. Update ip6_route_check_nh_onlink to disregard the device mismatch if the gateway lookup hits the default route. Turns out the existing onlink tests are passing because there is no default route or it is an unreachable default, so update the onlink tests to have a default route other than unreachable. Fixes: fc1e64e1092f6 ("net/ipv6: Add support for onlink flag") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ tools/testing/selftests/net/fib-onlink-tests.sh | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3226284e480..2a7423c39456 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2745,6 +2745,8 @@ static int ip6_route_check_nh_onlink(struct net *net, grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { if (!grt->dst.error && + /* ignore match if it is the default route */ + grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index 3991ad1a368d..864f865eee55 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -167,8 +167,8 @@ setup() # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} ip li set ${VRF} up - ip ro add table ${VRF_TABLE} unreachable default - ip -6 ro add table ${VRF_TABLE} unreachable default + ip ro add table ${VRF_TABLE} unreachable default metric 8192 + ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192 # create test interfaces ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} @@ -185,20 +185,20 @@ setup() for n in 1 3 5 7; do ip li set ${NETIFS[p${n}]} up ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done # move peer interfaces to namespace and add addresses for n in 2 4 6 8; do ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done - set +e + ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} + ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} - # let DAD complete - assume default of 1 probe - sleep 1 + set +e } cleanup() From 899ecaedd15599c22553d158f53b127cc1632dc2 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Wed, 24 Oct 2018 14:51:23 -0700 Subject: [PATCH 17/28] net: ethernet: cadence: fix socket buffer corruption problem Socket buffer is not re-created when headroom is 2 and tailroom is 1. Signed-off-by: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8f5bf9166c11..1d86b4d5645a 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1684,7 +1684,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) padlen = 0; /* No room for FCS, need to reallocate skb. */ else - padlen = ETH_FCS_LEN - tailroom; + padlen = ETH_FCS_LEN; } else { /* Add room for FCS. */ padlen += ETH_FCS_LEN; From 42d0f71c9b5fd48861d61cfc05c9e001f847c9d5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 25 Oct 2018 01:42:26 +0000 Subject: [PATCH 18/28] octeontx2-af: Use GFP_ATOMIC under spin lock The function nix_update_mce_list() is called from nix_update_bcast_mce_list(), and a spin lock is held here, so we should use GFP_ATOMIC instead. Fixes: 4b05528ebf0c ("octeontx2-af: Update bcast list upon NIXLF alloc/free") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a4eac3b9ee72..a5ab7eff2301 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1294,7 +1294,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list, return 0; /* Add a new one to the list, at the tail */ - mce = kzalloc(sizeof(*mce), GFP_KERNEL); + mce = kzalloc(sizeof(*mce), GFP_ATOMIC); if (!mce) return -ENOMEM; mce->idx = idx; From 649f0837a8cc2b39329f2de00fa0d04b029291c5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 25 Oct 2018 18:40:19 +0200 Subject: [PATCH 19/28] r8169: fix broken Wake-on-LAN from S5 (poweroff) It was reported that WoL from S5 is broken (WoL from S3 works) and the analysis showed that during system shutdown the network interface was brought down already when the actual kernel shutdown started. Therefore netif_running() returned false and as a consequence the PHY was suspended. Obviously WoL wasn't working then. To fix this the original patch needs to be effectively reverted. A side effect is that when normally bringing down the interface and WoL is enabled the PHY will remain powered on (like it was before the original patch). Fixes: fe87bef01f9b ("r8169: don't check WoL when powering down PHY and interface is down") Reported-by: Neil MacLeod Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 006b0aa8cec3..1fd01688d37b 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4161,10 +4161,15 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) static bool rtl_wol_pll_power_down(struct rtl8169_private *tp) { - if (!netif_running(tp->dev) || !__rtl8169_get_wol(tp)) + struct phy_device *phydev; + + if (!__rtl8169_get_wol(tp)) return false; - phy_speed_down(tp->dev->phydev, false); + /* phydev may not be attached to netdevice */ + phydev = mdiobus_get_phy(tp->mii_bus, 0); + + phy_speed_down(phydev, false); rtl_wol_suspend_quirk(tp); return true; From f802912d42525eddb2e504b0b852539f46c8fd9d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 25 Oct 2018 14:42:38 +0200 Subject: [PATCH 20/28] net: phy: genphy_10g_driver: Avoid NULL pointer dereference This driver got missed during the recent change of .features from a u32 to a pointer to a Linux bitmap. Change the initialisation from 0 to PHY_10GBIT_FEATURES so removing the danger of a NULL pointer dereference. Fixes: 719655a14971 ("net: phy: Replace phy driver features u32 with link_mode bitmap") Reported-by: Jose Abreu Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy-c45.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index e1225545362d..d7636ff03bc7 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -329,7 +329,7 @@ struct phy_driver genphy_10g_driver = { .name = "Generic 10G PHY", .soft_reset = gen10g_no_soft_reset, .config_init = gen10g_config_init, - .features = 0, + .features = PHY_10GBIT_FEATURES, .config_aneg = gen10g_config_aneg, .read_status = gen10g_read_status, .suspend = gen10g_suspend, From 55469bc6b5770ffdf0f87c38ce2573f8ddbaa33f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Oct 2018 06:42:12 -0700 Subject: [PATCH 21/28] drivers: net: remove inclusion when not needed Drivers using generic NAPI interface no longer need to include , since busy polling was moved to core networking stack long ago. See commit 79e7fff47b7b ("net: remove support for per driver ndo_busy_poll()") for reference. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 1 - drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 - drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 - drivers/net/ethernet/intel/iavf/iavf_txrx.c | 1 - drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 - drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 - drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 - drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 - 9 files changed, 9 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index d96a84a62d78..0cc911f928b1 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -119,7 +119,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 5a727d4729da..686899d7e555 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include "bnx2x_cmn.h" #include "bnx2x_init.h" diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 740ea58ba938..aef3c89ee79c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2,7 +2,6 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include -#include #include #include #include "i40e.h" diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index edc349f49748..fb9bfad96daf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -2,7 +2,6 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include -#include #include "iavf.h" #include "iavf_trace.h" diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 7a7679e7be84..ec1b87cc4410 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -30,7 +30,6 @@ #include "ixgbe_ipsec.h" #include -#include /* common prefix used by pr_<> macros */ #undef pr_fmt diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fe49384eba48..b744cd49a785 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a1aeeb8094c3..5a6d0919533d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -31,7 +31,6 @@ * */ -#include #include #include #include diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 2f7fb8de6967..94224c22ecc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index b2d2ec8c11e2..5f384f73007d 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -70,7 +70,6 @@ #include #include #include -#include #include "myri10ge_mcp.h" #include "myri10ge_mcp_gen_header.h" From 863d4187d05aafd53f13fe1f358e9eb42bb0dbc1 Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Thu, 25 Oct 2018 13:09:38 -0400 Subject: [PATCH 22/28] lan743x: Remove SPI dependency from Microchip group. The SPI dependency does not apply to lan743x driver, and other drivers in the group already state their dependence on SPI. Signed-off-by: Bryan Whitehead Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 16bd3f44dbe8..cf1d49149cc8 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -5,7 +5,6 @@ config NET_VENDOR_MICROCHIP bool "Microchip devices" default y - depends on SPI ---help--- If you have a network (Ethernet) card belonging to this class, say Y. From bf4cc40e9343bbe6c7400ff6f4feb46fb9338087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 25 Oct 2018 21:18:25 +0200 Subject: [PATCH 23/28] net/{ipv4,ipv6}: Do not put target net if input nsid is invalid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleanup path will put the target net when netnsid is set. So we must reset netnsid if the input is invalid. Fixes: d7e38611b81e ("net/ipv4: Put target net when address dump fails due to bad attributes") Fixes: 242afaa6968c ("net/ipv6: Put target net when address dump fails due to bad attributes") Cc: David Ahern Signed-off-by: Bjørn Mork Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 1 + net/ipv6/addrconf.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9250b309c742..a34602ae27de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1704,6 +1704,7 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); if (IS_ERR(net)) { + fillargs->netnsid = -1; NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id"); return PTR_ERR(net); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7eb09c86fa13..63a808d5af15 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5058,6 +5058,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->netnsid = nla_get_s32(tb[i]); net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); if (IS_ERR(net)) { + fillargs->netnsid = -1; NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id"); return PTR_ERR(net); } From ee1abcf689353f36d9322231b4320926096bdee0 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: [PATCH 24/28] ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a25cfdd47c89..659ecf4e4b3c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; From fb692ec4117f6fd25044cfb5720d6b79d400dc65 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 25 Oct 2018 13:25:28 +0200 Subject: [PATCH 25/28] net/smc: fix smc_buf_unuse to use the lgr pointer The pointer to the link group is unset in the smc connection structure right before the call to smc_buf_unuse. Provide the lgr pointer to smc_buf_unuse explicitly. And move the call to smc_lgr_schedule_free_work to the end of smc_conn_free. Fixes: a6920d1d130c ("net/smc: handle unregistered buffers") Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e871368500e3..18daebcef181 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -122,22 +122,17 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ } -/* Unregister connection and trigger lgr freeing if applicable +/* Unregister connection from lgr */ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - int reduced = 0; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { - reduced = 1; __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - if (!reduced || lgr->conns_num) - return; - smc_lgr_schedule_free_work(lgr); } /* Send delete link, either as client to request the initiation @@ -291,7 +286,8 @@ out: return rc; } -static void smc_buf_unuse(struct smc_connection *conn) +static void smc_buf_unuse(struct smc_connection *conn, + struct smc_link_group *lgr) { if (conn->sndbuf_desc) conn->sndbuf_desc->used = 0; @@ -301,8 +297,6 @@ static void smc_buf_unuse(struct smc_connection *conn) conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ - struct smc_link_group *lgr = conn->lgr; - write_lock_bh(&lgr->rmbs_lock); list_del(&conn->rmb_desc->list); write_unlock_bh(&lgr->rmbs_lock); @@ -315,16 +309,21 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - if (!conn->lgr) + struct smc_link_group *lgr = conn->lgr; + + if (!lgr) return; - if (conn->lgr->is_smcd) { + if (lgr->is_smcd) { smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); - smc_buf_unuse(conn); + smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + + if (!lgr->conns_num) + smc_lgr_schedule_free_work(lgr); } static void smc_link_clear(struct smc_link *lnk) From 5a2de63fd1a59c30c02526d427bc014b98adf508 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: [PATCH 26/28] bridge: do not add port to router list when receives query with source 0.0.0.0 Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 024139b51d3a..41cdafbf2ebe 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1422,7 +1422,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static void br_ip4_multicast_query(struct net_bridge *br, From f64bf6b8ae802e93231155b0d92a619d896cd0bd Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Fri, 26 Oct 2018 12:24:35 +0100 Subject: [PATCH 27/28] net: allow traceroute with a specified interface in a vrf Traceroute executed in a vrf succeeds if no device is given or if the vrf is given as the device, but fails if the interface is given as the device. This is for default UDP probes, it succeeds for TCP SYN or ICMP ECHO probes. As the skb bound dev is the interface and the sk dev is the vrf, sk lookup fails for ICMP_DEST_UNREACH and ICMP_TIME_EXCEEDED messages. The solution is for the secondary dev to be passed so that the interface is available for the device match to succeed, in the same way as is already done for non-error cases. Signed-off-by: Mike Manning Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7e048288fcab..ca3ed931f2a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -609,8 +609,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, 0, - udptable, NULL); + iph->saddr, uh->source, skb->dev->ifindex, + inet_sdif(skb), udptable, NULL); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 06d17ff3562f..d2d97d07ef27 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); From aab456dfa404f3a16d6f1780e62a6a8533c4d008 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Oct 2018 09:33:27 -0700 Subject: [PATCH 28/28] net/neigh: fix NULL deref in pneigh_dump_table() pneigh can have NULL device pointer, so we need to make neigh_master_filtered() and neigh_ifindex_filtered() more robust. syzbot report : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15867 Comm: syz-executor2 Not tainted 4.19.0+ #276 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:179 [inline] RIP: 0010:list_empty include/linux/list.h:203 [inline] RIP: 0010:netdev_master_upper_dev_get+0xa1/0x250 net/core/dev.c:6467 RSP: 0018:ffff8801bfaf7220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000001 RCX: ffffc90005e92000 RDX: 0000000000000016 RSI: ffffffff860b44d9 RDI: 0000000000000005 RBP: ffff8801bfaf72b0 R08: ffff8801c4c84080 R09: fffffbfff139a580 R10: fffffbfff139a580 R11: ffffffff89cd2c07 R12: 1ffff10037f5ee45 R13: 0000000000000000 R14: ffff8801bfaf7288 R15: 00000000000000b0 FS: 00007f65cc68d700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b33a21000 CR3: 00000001c6116000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: neigh_master_filtered net/core/neighbour.c:2367 [inline] pneigh_dump_table net/core/neighbour.c:2456 [inline] neigh_dump_info+0x7a9/0x1ce0 net/core/neighbour.c:2577 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] rtnetlink_rcv_msg+0x809/0xc20 net/core/rtnetlink.c:4898 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4953 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x5a5/0x760 net/netlink/af_netlink.c:1336 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 sock_write_iter+0x35e/0x5c0 net/socket.c:900 call_write_iter include/linux/fs.h:1808 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6b8/0x9f0 fs/read_write.c:487 vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457569 Fixes: 6f52f80e8530 ("net/neigh: Extend dump filter to proxy neighbor dumps") Signed-off-by: Eric Dumazet Cc: David Ahern Reported-by: syzbot Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ee605d9d8bd4..41954e42a2de 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2364,7 +2364,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) if (!master_idx) return false; - master = netdev_master_upper_dev_get(dev); + master = dev ? netdev_master_upper_dev_get(dev) : NULL; if (!master || master->ifindex != master_idx) return true; @@ -2373,7 +2373,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) { - if (filter_idx && dev->ifindex != filter_idx) + if (filter_idx && (!dev || dev->ifindex != filter_idx)) return true; return false;