From f4f1c23d6e41f5ee4973a6da65cba1e1c536ec29 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:50:57 -0700 Subject: [PATCH 1/9] netvsc: fix NAPI performance regression When using NAPI, the single stream performance declined signifcantly because the poll routine was updating host after every burst of packets. This excess signalling caused host throttling. This fix restores the old behavior. Host is only signalled after the ring has been emptied. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc.c | 41 +++++++++++++++------------------ 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 6b5f75217694..a33f2ee86044 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -723,6 +723,7 @@ struct net_device_context { /* Per channel data */ struct netvsc_channel { struct vmbus_channel *channel; + const struct vmpacket_descriptor *desc; struct napi_struct napi; struct multi_send_data msd; struct multi_recv_comp mrc; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 989b7cd99380..727762d0f13b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1173,7 +1173,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device, struct vmbus_channel *channel, struct netvsc_device *net_device, struct net_device *ndev, - u64 request_id, const struct vmpacket_descriptor *desc) { struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -1195,7 +1194,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device, default: netdev_err(ndev, "unhandled packet type %d, tid %llx\n", - desc->type, request_id); + desc->type, desc->trans_id); break; } @@ -1222,28 +1221,20 @@ int netvsc_poll(struct napi_struct *napi, int budget) u16 q_idx = channel->offermsg.offer.sub_channel_index; struct net_device *ndev = hv_get_drvdata(device); struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); - const struct vmpacket_descriptor *desc; int work_done = 0; - desc = hv_pkt_iter_first(channel); - while (desc) { - int count; + /* If starting a new interval */ + if (!nvchan->desc) + nvchan->desc = hv_pkt_iter_first(channel); - count = netvsc_process_raw_pkt(device, channel, net_device, - ndev, desc->trans_id, desc); - work_done += count; - desc = __hv_pkt_iter_next(channel, desc); - - /* If receive packet budget is exhausted, reschedule */ - if (work_done >= budget) { - work_done = budget; - break; - } + while (nvchan->desc && work_done < budget) { + work_done += netvsc_process_raw_pkt(device, channel, net_device, + ndev, nvchan->desc); + nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc); } - hv_pkt_iter_close(channel); - /* If budget was not exhausted and - * not doing busy poll + /* If receive ring was exhausted + * and not doing busy poll * then re-enable host interrupts * and reschedule if ring is not empty. */ @@ -1253,7 +1244,9 @@ int netvsc_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); netvsc_chk_recv_comp(net_device, channel, q_idx); - return work_done; + + /* Driver may overshoot since multiple packets per descriptor */ + return min(work_done, budget); } /* Call back when data is available in host ring buffer. @@ -1263,10 +1256,12 @@ void netvsc_channel_cb(void *context) { struct netvsc_channel *nvchan = context; - /* disable interupts from host */ - hv_begin_read(&nvchan->channel->inbound); + if (napi_schedule_prep(&nvchan->napi)) { + /* disable interupts from host */ + hv_begin_read(&nvchan->channel->inbound); - napi_schedule(&nvchan->napi); + __napi_schedule(&nvchan->napi); + } } /* From 163891d7d42935e7499daa0646a8eb3c44504300 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:50:58 -0700 Subject: [PATCH 2/9] netvsc: handle offline mtu and channel change If device is not up, then changing MTU (or number of channels) should not re-enable the device. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 191372486a87..b3a7f508434b 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -743,6 +743,7 @@ static int netvsc_set_channels(struct net_device *net, struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = net_device_ctx->nvdev; unsigned int count = channels->combined_count; + bool was_running; int ret; /* We do not support separate count for rx, tx, or other */ @@ -762,9 +763,12 @@ static int netvsc_set_channels(struct net_device *net, if (count > nvdev->max_chn) return -EINVAL; - ret = netvsc_close(net); - if (ret) - return ret; + was_running = netif_running(net); + if (was_running) { + ret = netvsc_close(net); + if (ret) + return ret; + } net_device_ctx->start_remove = true; rndis_filter_device_remove(dev, nvdev); @@ -775,9 +779,11 @@ static int netvsc_set_channels(struct net_device *net, else netvsc_set_queues(net, dev, nvdev->num_chn); - netvsc_open(net); net_device_ctx->start_remove = false; + if (was_running) + ret = netvsc_open(net); + /* We may have missed link change notifications */ schedule_delayed_work(&net_device_ctx->dwork, 0); @@ -845,14 +851,18 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = ndevctx->nvdev; struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; + bool was_running; int ret; if (ndevctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; - ret = netvsc_close(ndev); - if (ret) - goto out; + was_running = netif_running(ndev); + if (was_running) { + ret = netvsc_close(ndev); + if (ret) + return ret; + } memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; @@ -872,10 +882,11 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) rndis_filter_device_add(hdev, &device_info); -out: - netvsc_open(ndev); ndevctx->start_remove = false; + if (was_running) + ret = netvsc_open(ndev); + /* We may have missed link change notifications */ schedule_delayed_work(&ndevctx->dwork, 0); From 3071ada4916e26a8961c1b99f7766a73b9007bfc Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:50:59 -0700 Subject: [PATCH 3/9] netvsc: change max channel calculation The default number of maximum channels should be limited to the number of cpus available on the numa node of the primary channel. This also makes sure maximum channels <= num_online_cpus Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 +-- drivers/net/hyperv/rndis_filter.c | 25 ++++++++++--------------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index b3a7f508434b..2f9de2e9f38e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1503,8 +1503,7 @@ static int netvsc_probe(struct hv_device *dev, /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT, - num_online_cpus()); + device_info.num_chn = VRSS_CHANNEL_DEFAULT; ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 382b9a62e3c4..d193d549cec6 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1181,33 +1181,28 @@ int rndis_filter_device_add(struct hv_device *dev, if (ret || rsscap.num_recv_que < 2) goto out; - net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que); - - num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn); - /* * We will limit the VRSS channels to the number CPUs in the NUMA node * the primary channel is currently bound to. + * + * This also guarantees that num_possible_rss_qs <= num_online_cpus */ node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu)); - num_possible_rss_qs = cpumask_weight(node_cpu_mask); + num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask), + rsscap.num_recv_que); + + net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs); /* We will use the given number of channels if available. */ - if (device_info->num_chn && device_info->num_chn < net_device->max_chn) - net_device->num_chn = device_info->num_chn; - else - net_device->num_chn = min(num_possible_rss_qs, num_rss_qs); - - num_rss_qs = net_device->num_chn - 1; + net_device->num_chn = min(net_device->max_chn, device_info->num_chn); for (i = 0; i < ITAB_NUM; i++) rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, net_device->num_chn); - net_device->num_sc_offered = num_rss_qs; - - if (net_device->num_chn == 1) - goto out; + num_rss_qs = net_device->num_chn - 1; + if (num_rss_qs == 0) + return 0; vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); From 545a8e79bd1cc8774877a26275171a2ec8881c9e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:00 -0700 Subject: [PATCH 4/9] netvsc: use RCU to protect inner device structure The netvsc driver has an internal structure (netvsc_device) which is created when device is opened and released when device is closed. And also opened/released when MTU or number of channels change. Since this is referenced in the receive and transmit path, it is safer to use RCU to protect/prevent use after free problems. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 ++- drivers/net/hyperv/netvsc.c | 16 ++++++--- drivers/net/hyperv/netvsc_drv.c | 61 +++++++++++++++++++++++---------- 3 files changed, 57 insertions(+), 24 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a33f2ee86044..0ade21f95d71 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -686,7 +686,7 @@ struct net_device_context { /* point back to our device context */ struct hv_device *device_ctx; /* netvsc_device */ - struct netvsc_device *nvdev; + struct netvsc_device __rcu *nvdev; /* reconfigure work */ struct delayed_work dwork; /* last reconfig time */ @@ -780,6 +780,8 @@ struct netvsc_device { atomic_t open_cnt; struct netvsc_channel chan_table[VRSS_CHANNEL_MAX]; + + struct rcu_head rcu; }; static inline struct netvsc_device * diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 727762d0f13b..ab9118d620ab 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -80,8 +80,10 @@ static struct netvsc_device *alloc_net_device(void) return net_device; } -static void free_netvsc_device(struct netvsc_device *nvdev) +static void free_netvsc_device(struct rcu_head *head) { + struct netvsc_device *nvdev + = container_of(head, struct netvsc_device, rcu); int i; for (i = 0; i < VRSS_CHANNEL_MAX; i++) @@ -90,6 +92,10 @@ static void free_netvsc_device(struct netvsc_device *nvdev) kfree(nvdev); } +static void free_netvsc_device_rcu(struct netvsc_device *nvdev) +{ + call_rcu(&nvdev->rcu, free_netvsc_device); +} static struct netvsc_device *get_outbound_net_device(struct hv_device *device) { @@ -551,7 +557,7 @@ void netvsc_device_remove(struct hv_device *device) netvsc_disconnect_vsp(device); - net_device_ctx->nvdev = NULL; + RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); /* * At this point, no one should be accessing net_device @@ -566,7 +572,7 @@ void netvsc_device_remove(struct hv_device *device) napi_disable(&net_device->chan_table[i].napi); /* Release all resources */ - free_netvsc_device(net_device); + free_netvsc_device_rcu(net_device); } #define RING_AVAIL_PERCENT_HIWATER 20 @@ -1322,7 +1328,7 @@ int netvsc_device_add(struct hv_device *device, */ wmb(); - net_device_ctx->nvdev = net_device; + rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ ret = netvsc_connect_vsp(device); @@ -1341,7 +1347,7 @@ close: vmbus_close(device->channel); cleanup: - free_netvsc_device(net_device); + free_netvsc_device(&net_device->rcu); return ret; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2f9de2e9f38e..d8a70d07eeec 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -62,7 +62,7 @@ static void do_set_multicast(struct work_struct *w) container_of(w, struct net_device_context, work); struct hv_device *device_obj = ndevctx->device_ctx; struct net_device *ndev = hv_get_drvdata(device_obj); - struct netvsc_device *nvdev = ndevctx->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndevctx->nvdev); struct rndis_device *rdev; if (!nvdev) @@ -116,7 +116,7 @@ static int netvsc_open(struct net_device *net) static int netvsc_close(struct net_device *net) { struct net_device_context *net_device_ctx = netdev_priv(net); - struct netvsc_device *nvdev = net_device_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); int ret; u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20; struct vmbus_channel *chn; @@ -637,9 +637,9 @@ int netvsc_recv_callback(struct net_device *net, const struct ndis_pkt_8021q_info *vlan) { struct net_device_context *net_device_ctx = netdev_priv(net); - struct netvsc_device *net_device = net_device_ctx->nvdev; + struct netvsc_device *net_device; u16 q_idx = channel->offermsg.offer.sub_channel_index; - struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; + struct netvsc_channel *nvchan; struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; @@ -655,6 +655,11 @@ int netvsc_recv_callback(struct net_device *net, * interface in the guest. */ rcu_read_lock(); + net_device = rcu_dereference(net_device_ctx->nvdev); + if (unlikely(!net_device)) + goto drop; + + nvchan = &net_device->chan_table[q_idx]; vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); if (vf_netdev && (vf_netdev->flags & IFF_UP)) net = vf_netdev; @@ -663,6 +668,7 @@ int netvsc_recv_callback(struct net_device *net, skb = netvsc_alloc_recv_skb(net, &nvchan->napi, csum_info, vlan, data, len); if (unlikely(!skb)) { +drop: ++net->stats.rx_dropped; rcu_read_unlock(); return NVSP_STAT_FAIL; @@ -704,7 +710,7 @@ static void netvsc_get_channels(struct net_device *net, struct ethtool_channels *channel) { struct net_device_context *net_device_ctx = netdev_priv(net); - struct netvsc_device *nvdev = net_device_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); if (nvdev) { channel->max_combined = nvdev->max_chn; @@ -741,7 +747,7 @@ static int netvsc_set_channels(struct net_device *net, { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *dev = net_device_ctx->device_ctx; - struct netvsc_device *nvdev = net_device_ctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); unsigned int count = channels->combined_count; bool was_running; int ret; @@ -848,7 +854,7 @@ static int netvsc_set_link_ksettings(struct net_device *dev, static int netvsc_change_mtu(struct net_device *ndev, int mtu) { struct net_device_context *ndevctx = netdev_priv(ndev); - struct netvsc_device *nvdev = ndevctx->nvdev; + struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev); struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; bool was_running; @@ -897,7 +903,7 @@ static void netvsc_get_stats64(struct net_device *net, struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = ndev_ctx->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndev_ctx->nvdev); int i; if (!nvdev) @@ -982,7 +988,10 @@ static const struct { static int netvsc_get_sset_count(struct net_device *dev, int string_set) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = ndc->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + + if (!nvdev) + return -ENODEV; switch (string_set) { case ETH_SS_STATS: @@ -996,13 +1005,16 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = ndc->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); const void *nds = &ndc->eth_stats; const struct netvsc_stats *qstats; unsigned int start; u64 packets, bytes; int i, j; + if (!nvdev) + return; + for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); @@ -1031,10 +1043,13 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = ndc->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); u8 *p = data; int i; + if (!nvdev) + return; + switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) @@ -1086,7 +1101,10 @@ netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rules) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *nvdev = ndc->nvdev; + struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev); + + if (!nvdev) + return -ENODEV; switch (info->cmd) { case ETHTOOL_GRXRINGS: @@ -1122,10 +1140,13 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *ndev = ndc->nvdev; + struct netvsc_device *ndev = rcu_dereference(ndc->nvdev); struct rndis_device *rndis_dev = ndev->extension; int i; + if (!ndev) + return -ENODEV; + if (hfunc) *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ @@ -1144,10 +1165,13 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct net_device_context *ndc = netdev_priv(dev); - struct netvsc_device *ndev = ndc->nvdev; + struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev); struct rndis_device *rndis_dev = ndev->extension; int i; + if (!ndev) + return -ENODEV; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -1224,7 +1248,7 @@ static void netvsc_link_change(struct work_struct *w) if (ndev_ctx->start_remove) goto out_unlock; - net_device = ndev_ctx->nvdev; + net_device = rtnl_dereference(ndev_ctx->nvdev); rdev = net_device->extension; next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT; @@ -1365,7 +1389,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); - netvsc_dev = net_device_ctx->nvdev; + netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; @@ -1391,7 +1415,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); - netvsc_dev = net_device_ctx->nvdev; + netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); netdev_info(ndev, "VF up: %s\n", vf_netdev->name); @@ -1425,7 +1449,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); - netvsc_dev = net_device_ctx->nvdev; + netvsc_dev = rtnl_dereference(net_device_ctx->nvdev); netdev_info(ndev, "VF down: %s\n", vf_netdev->name); netvsc_switch_datapath(ndev, false); @@ -1519,6 +1543,7 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; + /* RCU not necessary here, device not registered */ nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); From a0be450e19d397e9ff215e32ed31bc51339b460a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:01 -0700 Subject: [PATCH 5/9] netvsc: uses RCU instead of removal flag It is cleaner to use RCU protected pointer (nvdev_ctx->nvdev) to indicate device is in removed state, rather than having a separate boolean flag. By using the pointer the context can be checked by static checkers and dynamic lockdep. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 --- drivers/net/hyperv/netvsc.c | 4 ---- drivers/net/hyperv/netvsc_drv.c | 34 ++++++++++----------------------- 3 files changed, 10 insertions(+), 31 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 0ade21f95d71..907f55960ba8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -708,9 +708,6 @@ struct net_device_context { u32 speed; struct netvsc_ethtool_stats eth_stats; - /* the device is going away */ - bool start_remove; - /* State to manage the associated VF interface. */ struct net_device __rcu *vf_netdev; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ab9118d620ab..1f17d948f9b0 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -605,7 +605,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -639,7 +638,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, wake_up(&net_device->wait_drain); if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - !net_device_ctx->start_remove && (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); @@ -1326,8 +1324,6 @@ int netvsc_device_add(struct hv_device *device, /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is * populated. */ - wmb(); - rcu_assign_pointer(net_device_ctx->nvdev, net_device); /* Connect with the NetVsp */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d8a70d07eeec..eb7ae79d47bb 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -760,7 +760,7 @@ static int netvsc_set_channels(struct net_device *net, if (count > net->num_tx_queues || count > net->num_rx_queues) return -EINVAL; - if (net_device_ctx->start_remove || !nvdev || nvdev->destroy) + if (!nvdev || nvdev->destroy) return -ENODEV; if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) @@ -776,7 +776,6 @@ static int netvsc_set_channels(struct net_device *net, return ret; } - net_device_ctx->start_remove = true; rndis_filter_device_remove(dev, nvdev); ret = netvsc_set_queues(net, dev, count); @@ -785,8 +784,6 @@ static int netvsc_set_channels(struct net_device *net, else netvsc_set_queues(net, dev, nvdev->num_chn); - net_device_ctx->start_remove = false; - if (was_running) ret = netvsc_open(net); @@ -860,7 +857,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) bool was_running; int ret; - if (ndevctx->start_remove || !nvdev || nvdev->destroy) + if (!nvdev || nvdev->destroy) return -ENODEV; was_running = netif_running(ndev); @@ -875,7 +872,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) device_info.num_chn = nvdev->num_chn; device_info.max_num_vrss_chns = nvdev->num_chn; - ndevctx->start_remove = true; rndis_filter_device_remove(hdev, nvdev); /* 'nvdev' has been freed in rndis_filter_device_remove() -> @@ -888,8 +884,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) rndis_filter_device_add(hdev, &device_info); - ndevctx->start_remove = false; - if (was_running) ret = netvsc_open(ndev); @@ -1245,10 +1239,10 @@ static void netvsc_link_change(struct work_struct *w) unsigned long flags, next_reconfig, delay; rtnl_lock(); - if (ndev_ctx->start_remove) + net_device = rtnl_dereference(ndev_ctx->nvdev); + if (!net_device) goto out_unlock; - net_device = rtnl_dereference(ndev_ctx->nvdev); rdev = net_device->extension; next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT; @@ -1509,8 +1503,6 @@ static int netvsc_probe(struct hv_device *dev, hv_set_drvdata(dev, net); - net_device_ctx->start_remove = false; - INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); @@ -1579,26 +1571,20 @@ static int netvsc_remove(struct hv_device *dev) ndev_ctx = netdev_priv(net); - /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels() - * removing the device. - */ - rtnl_lock(); - ndev_ctx->start_remove = true; - rtnl_unlock(); + netif_device_detach(net); cancel_delayed_work_sync(&ndev_ctx->dwork); cancel_work_sync(&ndev_ctx->work); - /* Stop outbound asap */ - netif_tx_disable(net); - - unregister_netdev(net); - /* * Call to the vsc driver to let it know that the device is being - * removed + * removed. Also blocks mtu and channel changes. */ + rtnl_lock(); rndis_filter_device_remove(dev, ndev_ctx->nvdev); + rtnl_unlock(); + + unregister_netdev(net); hv_set_drvdata(dev, NULL); From 43c7bd1ffcd1621c64cedf1be52156e2f95bba9b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:02 -0700 Subject: [PATCH 6/9] netvsc: use refcount_t for keeping track of sub channels Rather than a lock and variable, use a refcount_t to keep track of the number of sub channels. Don't need to wait for subchannels on device removal since wait was already done in device_add. Also fix the error handling; don't wait forever in case of an error on request to create sub channels. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 +-- drivers/net/hyperv/rndis_filter.c | 41 ++++++++----------------------- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 907f55960ba8..4747ad48b3cc 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -761,8 +761,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; - spinlock_t sc_lock; /* Protects num_sc_offered variable */ - u32 num_sc_offered; + + refcount_t sc_offered; /* Holds rndis device info */ void *extension; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index d193d549cec6..3bd5447277ad 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -997,7 +997,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev); u16 chn_index = new_sc->offermsg.offer.sub_channel_index; struct netvsc_channel *nvchan; - unsigned long flags; int ret; if (chn_index >= nvscdev->num_chn) @@ -1019,10 +1018,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) napi_enable(&nvchan->napi); - spin_lock_irqsave(&nvscdev->sc_lock, flags); - nvscdev->num_sc_offered--; - spin_unlock_irqrestore(&nvscdev->sc_lock, flags); - if (nvscdev->num_sc_offered == 0) + if (refcount_dec_and_test(&nvscdev->sc_offered)) complete(&nvscdev->channel_init_wait); } @@ -1039,12 +1035,9 @@ int rndis_filter_device_add(struct hv_device *dev, struct ndis_recv_scale_cap rsscap; u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); unsigned int gso_max_size = GSO_MAX_SIZE; - u32 mtu, size; - u32 num_rss_qs; - u32 sc_delta; + u32 mtu, size, num_rss_qs; const struct cpumask *node_cpu_mask; u32 num_possible_rss_qs; - unsigned long flags; int i, ret; rndis_device = get_rndis_device(); @@ -1067,7 +1060,7 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->max_chn = 1; net_device->num_chn = 1; - spin_lock_init(&net_device->sc_lock); + refcount_set(&net_device->sc_offered, 0); net_device->extension = rndis_device; rndis_device->ndev = net; @@ -1204,6 +1197,7 @@ int rndis_filter_device_add(struct hv_device *dev, if (num_rss_qs == 0) return 0; + refcount_set(&net_device->sc_offered, num_rss_qs); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); init_packet = &net_device->channel_init_pkt; @@ -1219,32 +1213,23 @@ int rndis_filter_device_add(struct hv_device *dev, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) goto out; - wait_for_completion(&net_device->channel_init_wait); - if (init_packet->msg.v5_msg.subchn_comp.status != - NVSP_STAT_SUCCESS) { + if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { ret = -ENODEV; goto out; } + wait_for_completion(&net_device->channel_init_wait); + net_device->num_chn = 1 + init_packet->msg.v5_msg.subchn_comp.num_subchannels; - ret = rndis_filter_set_rss_param(rndis_device, netvsc_hash_key, - net_device->num_chn); - - /* - * Set the number of sub-channels to be received. - */ - spin_lock_irqsave(&net_device->sc_lock, flags); - sc_delta = num_rss_qs - (net_device->num_chn - 1); - net_device->num_sc_offered -= sc_delta; - spin_unlock_irqrestore(&net_device->sc_lock, flags); - + /* ignore failues from setting rss parameters, still have channels */ + rndis_filter_set_rss_param(rndis_device, netvsc_hash_key, + net_device->num_chn); out: if (ret) { net_device->max_chn = 1; net_device->num_chn = 1; - net_device->num_sc_offered = 0; } return 0; /* return 0 because primary channel can be used alone */ @@ -1259,12 +1244,6 @@ void rndis_filter_device_remove(struct hv_device *dev, { struct rndis_device *rndis_dev = net_dev->extension; - /* If not all subchannel offers are complete, wait for them until - * completion to avoid race. - */ - if (net_dev->num_sc_offered > 0) - wait_for_completion(&net_dev->channel_init_wait); - /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); From 00ecfb3b34b69dd702dee1bd6de6fc100be384db Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:03 -0700 Subject: [PATCH 7/9] netvsc: remove unnecessary lock on shutdown The channel inbound lock was not being used at all by the netvsc device, but the spin_lock was helpful by providing necessary barrier before waiting. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 3bd5447277ad..cd7b83707e04 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -926,8 +926,6 @@ static void rndis_filter_halt_device(struct rndis_device *dev) struct rndis_halt_request *halt; struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); struct netvsc_device *nvdev = net_device_ctx->nvdev; - struct hv_device *hdev = net_device_ctx->device_ctx; - ulong flags; /* Attempt to do a rndis device halt */ request = get_rndis_request(dev, RNDIS_MSG_HALT, @@ -945,9 +943,10 @@ static void rndis_filter_halt_device(struct rndis_device *dev) dev->state = RNDIS_DEV_UNINITIALIZED; cleanup: - spin_lock_irqsave(&hdev->channel->inbound_lock, flags); nvdev->destroy = true; - spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags); + + /* Force flag to be ordered before waiting */ + wmb(); /* Wait for all send completions */ wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev)); From ebc1dcf6008e562a8f88a6b1f4a4705f4d4c4fdd Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:04 -0700 Subject: [PATCH 8/9] netvsc: eliminate unnecessary skb == NULL checks Since there already is a special case goto for control messages (skb == NULL) in netvsc_send, there is no need for later checks in same code path. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1f17d948f9b0..e998e2f7a619 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -706,8 +706,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, packet->page_buf_cnt; /* Add padding */ - if (skb && skb->xmit_more && remain && - !packet->cp_partial) { + if (skb->xmit_more && remain && !packet->cp_partial) { padding = net_device->pkt_align - remain; rndis_msg->msg_len += padding; packet->total_data_buflen += padding; @@ -865,9 +864,7 @@ int netvsc_send(struct hv_device *device, if (msdp->pkt) msd_len = msdp->pkt->total_data_buflen; - try_batch = (skb != NULL) && msd_len > 0 && msdp->count < - net_device->max_pkt; - + try_batch = msd_len > 0 && msdp->count < net_device->max_pkt; if (try_batch && msd_len + pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = msdp->pkt->send_buf_index; @@ -877,7 +874,7 @@ int netvsc_send(struct hv_device *device, section_index = msdp->pkt->send_buf_index; packet->cp_partial = true; - } else if ((skb != NULL) && pktlen + net_device->pkt_align < + } else if (pktlen + net_device->pkt_align < net_device->send_section_size) { section_index = netvsc_get_next_send_section(net_device); if (section_index != NETVSC_INVALID_INDEX) { From ce12b81061a0a2647ca90c04d131d90edd5c5063 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 22 Mar 2017 14:51:05 -0700 Subject: [PATCH 9/9] netvsc: fix and cleanup rndis_filter_set_packet_filter Fix warning from unused set_complete variable. And rearrange code to eliminate unnecessary goto's. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index cd7b83707e04..91b3bcfd9acb 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -819,16 +819,14 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) { struct rndis_request *request; struct rndis_set_request *set; - struct rndis_set_complete *set_complete; int ret; request = get_rndis_request(dev, RNDIS_MSG_SET, RNDIS_MESSAGE_SIZE(struct rndis_set_request) + sizeof(u32)); - if (!request) { - ret = -ENOMEM; - goto cleanup; - } + if (!request) + return -ENOMEM; + /* Setup the rndis set */ set = &request->request_msg.msg.set_req; @@ -840,15 +838,11 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) &new_filter, sizeof(u32)); ret = rndis_filter_send_request(dev, request); - if (ret != 0) - goto cleanup; + if (ret == 0) + wait_for_completion(&request->wait_event); - wait_for_completion(&request->wait_event); + put_rndis_request(dev, request); - set_complete = &request->response_msg.msg.set_complete; -cleanup: - if (request) - put_rndis_request(dev, request); return ret; }