From 616d37a070bb33ea387d0e93343acd8336a30886 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 18 Jun 2019 22:12:05 -0700 Subject: [PATCH 01/36] rds: fix reordering with composite message notification RDS composite message(rdma + control) user notification needs to be triggered once the full message is delivered and such a fix was added as part of commit 941f8d55f6d61 ("RDS: RDMA: Fix the composite message user notification"). But rds_send_remove_from_sock is missing data part notify check and hence at times the user don't get notification which isn't desirable. One way is to fix the rds_send_remove_from_sock to check of that case but considering the ordering complexity with completion handler and rdma + control messages are always dispatched back to back in same send context, just delaying the signaled completion on rmda work request also gets the desired behaviour. i.e Notifying application only after RDMA + control message send completes. So patch updates the earlier fix with this approach. The delay signaling completions of rdma op till the control message send completes fix was done by Venkat Venkatsubra in downstream kernel. Reviewed-and-tested-by: Zhu Yanjun Reviewed-by: Gerd Rausch Signed-off-by: Santosh Shilimkar --- net/rds/ib_send.c | 29 +++++++++++++---------------- net/rds/rdma.c | 10 ---------- net/rds/rds.h | 1 - net/rds/send.c | 4 +--- 4 files changed, 14 insertions(+), 30 deletions(-) diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 18f2341202f8..dfe6237dafe2 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -69,6 +69,16 @@ static void rds_ib_send_complete(struct rds_message *rm, complete(rm, notify_status); } +static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, + struct rm_data_op *op, + int wc_status) +{ + if (op->op_nents) + ib_dma_unmap_sg(ic->i_cm_id->device, + op->op_sg, op->op_nents, + DMA_TO_DEVICE); +} + static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, struct rm_rdma_op *op, int wc_status) @@ -129,21 +139,6 @@ static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic, rds_ib_stats_inc(s_ib_atomic_fadd); } -static void rds_ib_send_unmap_data(struct rds_ib_connection *ic, - struct rm_data_op *op, - int wc_status) -{ - struct rds_message *rm = container_of(op, struct rds_message, data); - - if (op->op_nents) - ib_dma_unmap_sg(ic->i_cm_id->device, - op->op_sg, op->op_nents, - DMA_TO_DEVICE); - - if (rm->rdma.op_active && rm->data.op_notify) - rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status); -} - /* * Unmap the resources associated with a struct send_work. * @@ -902,7 +897,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) send->s_queued = jiffies; send->s_op = NULL; - nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); + if (!op->op_notify) + nr_sig += rds_ib_set_wr_signal_state(ic, send, + op->op_notify); send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; send->s_rdma_wr.remote_addr = remote_addr; diff --git a/net/rds/rdma.c b/net/rds/rdma.c index b340ed4fc43a..916f5ec373d8 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -641,16 +641,6 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, } op->op_notifier->n_user_token = args->user_token; op->op_notifier->n_status = RDS_RDMA_SUCCESS; - - /* Enable rmda notification on data operation for composite - * rds messages and make sure notification is enabled only - * for the data operation which follows it so that application - * gets notified only after full message gets delivered. - */ - if (rm->data.op_sg) { - rm->rdma.op_notify = 0; - rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); - } } /* The cookie contains the R_Key of the remote memory region, and diff --git a/net/rds/rds.h b/net/rds/rds.h index 0d8f67cadd74..f0066d168499 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -476,7 +476,6 @@ struct rds_message { } rdma; struct rm_data_op { unsigned int op_active:1; - unsigned int op_notify:1; unsigned int op_nents; unsigned int op_count; unsigned int op_dmasg; diff --git a/net/rds/send.c b/net/rds/send.c index 166dd578c1cc..031b1e97a466 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -491,14 +491,12 @@ void rds_rdma_send_complete(struct rds_message *rm, int status) struct rm_rdma_op *ro; struct rds_notifier *notifier; unsigned long flags; - unsigned int notify = 0; spin_lock_irqsave(&rm->m_rs_lock, flags); - notify = rm->rdma.op_notify | rm->data.op_notify; ro = &rm->rdma; if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && - ro->op_active && notify && ro->op_notifier) { + ro->op_active && ro->op_notify && ro->op_notifier) { notifier = ro->op_notifier; rs = rm->m_rs; sock_hold(rds_rs_to_sk(rs)); From a55207884708bf1d5b8c87a3c504502de77a5416 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Fri, 28 Jun 2019 17:31:19 -0700 Subject: [PATCH 02/36] Revert "RDS: IB: split the mr registration and invalidation path" This reverts commit 56012459310a1dbcc55c2dbf5500a9f7571402cb. RDS kept spinning inside function "rds_ib_post_reg_frmr", waiting for "i_fastreg_wrs" to become incremented: while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { atomic_inc(&ibmr->ic->i_fastreg_wrs); cpu_relax(); } Looking at the original commit: commit 56012459310a ("RDS: IB: split the mr registration and invalidation path") In there, the "rds_ib_mr_cqe_handler" was changed in the following way: void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) if (frmr->fr_inv) { frmr->fr_state = FRMR_IS_FREE; frmr->fr_inv = false; atomic_inc(&ic->i_fastreg_wrs); } else { atomic_inc(&ic->i_fastunreg_wrs); } It looks like it's got it exactly backwards: Function "rds_ib_post_reg_frmr" keeps track of the outstanding requests via "i_fastreg_wrs". Function "rds_ib_post_inv" keeps track of the outstanding requests via "i_fastunreg_wrs" (post original commit). It also sets: frmr->fr_inv = true; However the completion handler "rds_ib_mr_cqe_handler" adjusts "i_fastreg_wrs" when "fr_inv" had been true, and adjusts "i_fastunreg_wrs" otherwise. The original commit was done in the name of performance: to remove the performance bottleneck No performance benefit could be observed with a fixed-up version of the original commit measured between two Oracle X7 servers, both equipped with Mellanox Connect-X5 HCAs. The prudent course of action is to revert this commit. Signed-off-by: Gerd Rausch Signed-off-by: Santosh Shilimkar --- net/rds/ib.h | 4 +--- net/rds/ib_cm.c | 9 ++------- net/rds/ib_frmr.c | 11 +++++------ 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 67a715b076ca..66c03c7665b2 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -15,8 +15,7 @@ #define RDS_IB_DEFAULT_RECV_WR 1024 #define RDS_IB_DEFAULT_SEND_WR 256 -#define RDS_IB_DEFAULT_FR_WR 256 -#define RDS_IB_DEFAULT_FR_INV_WR 256 +#define RDS_IB_DEFAULT_FR_WR 512 #define RDS_IB_DEFAULT_RETRY_COUNT 1 @@ -157,7 +156,6 @@ struct rds_ib_connection { /* To control the number of wrs from fastreg */ atomic_t i_fastreg_wrs; - atomic_t i_fastunreg_wrs; /* interrupt handling */ struct tasklet_struct i_send_tasklet; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 66c6eb56072b..8891822eba4f 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -460,10 +460,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) * completion queue and send queue. This extra space is used for FRMR * registration and invalidation work requests */ - fr_queue_space = rds_ibdev->use_fastreg ? - (RDS_IB_DEFAULT_FR_WR + 1) + - (RDS_IB_DEFAULT_FR_INV_WR + 1) - : 0; + fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0); /* add the conn now so that connection establishment has the dev */ rds_ib_add_conn(rds_ibdev, conn); @@ -530,7 +527,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) attr.send_cq = ic->i_send_cq; attr.recv_cq = ic->i_recv_cq; atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); - atomic_set(&ic->i_fastunreg_wrs, RDS_IB_DEFAULT_FR_INV_WR); /* * XXX this can fail if max_*_wr is too large? Are we supposed @@ -1009,8 +1005,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) wait_event(rds_ib_ring_empty_wait, rds_ib_ring_empty(&ic->i_recv_ring) && (atomic_read(&ic->i_signaled_sends) == 0) && - (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR) && - (atomic_read(&ic->i_fastunreg_wrs) == RDS_IB_DEFAULT_FR_INV_WR)); + (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); tasklet_kill(&ic->i_send_tasklet); tasklet_kill(&ic->i_recv_tasklet); diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 688dcd68d4ea..32ae26ed58a0 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -239,8 +239,8 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) if (frmr->fr_state != FRMR_IS_INUSE) goto out; - while (atomic_dec_return(&ibmr->ic->i_fastunreg_wrs) <= 0) { - atomic_inc(&ibmr->ic->i_fastunreg_wrs); + while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { + atomic_inc(&ibmr->ic->i_fastreg_wrs); cpu_relax(); } @@ -257,7 +257,7 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr) if (unlikely(ret)) { frmr->fr_state = FRMR_IS_STALE; frmr->fr_inv = false; - atomic_inc(&ibmr->ic->i_fastunreg_wrs); + atomic_inc(&ibmr->ic->i_fastreg_wrs); pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); goto out; } @@ -285,10 +285,9 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) if (frmr->fr_inv) { frmr->fr_state = FRMR_IS_FREE; frmr->fr_inv = false; - atomic_inc(&ic->i_fastreg_wrs); - } else { - atomic_inc(&ic->i_fastunreg_wrs); } + + atomic_inc(&ic->i_fastreg_wrs); } void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, From 8c6166cfc9cd48e93d9176561e50b63cef4330d5 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Thu, 27 Jun 2019 09:21:44 -0700 Subject: [PATCH 03/36] rds: Accept peer connection reject messages due to incompatible version Prior to commit d021fabf525ff ("rds: rdma: add consumer reject") function "rds_rdma_cm_event_handler_cmn" would always honor a rejected connection attempt by issuing a "rds_conn_drop". The commit mentioned above added a "break", eliminating the "fallthrough" case and made the "rds_conn_drop" rather conditional: Now it only happens if a "consumer defined" reject (i.e. "rdma_reject") carries an integer-value of "1" inside "private_data": if (!conn) break; err = (int *)rdma_consumer_reject_data(cm_id, event, &len); if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", &conn->c_laddr, &conn->c_faddr); conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; rds_conn_drop(conn); } rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); break; /* FALLTHROUGH */ A number of issues are worth mentioning here: #1) Previous versions of the RDS code simply rejected a connection by calling "rdma_reject(cm_id, NULL, 0);" So the value of the payload in "private_data" will not be "1", but "0". #2) Now the code has become dependent on host byte order and sizing. If one peer is big-endian, the other is little-endian, or there's a difference in sizeof(int) (e.g. ILP64 vs LP64), the *err check does not work as intended. #3) There is no check for "len" to see if the data behind *err is even valid. Luckily, it appears that the "rdma_reject(cm_id, NULL, 0)" will always carry 148 bytes of zeroized payload. But that should probably not be relied upon here. #4) With the added "break;", we might as well drop the misleading "/* FALLTHROUGH */" comment. This commit does _not_ address issue #2, as the sender would have to agree on a byte order as well. Here is the sequence of messages in this observed error-scenario: Host-A is pre-QoS changes (excluding the commit mentioned above) Host-B is post-QoS changes (including the commit mentioned above) #1 Host-B issues a connection request via function "rds_conn_path_transition" connection state transitions to "RDS_CONN_CONNECTING" #2 Host-A rejects the incompatible connection request (from #1) It does so by calling "rdma_reject(cm_id, NULL, 0);" #3 Host-B receives an "RDMA_CM_EVENT_REJECTED" event (from #2) But since the code is changed in the way described above, it won't drop the connection here, simply because "*err == 0". #4 Host-A issues a connection request #5 Host-B receives an "RDMA_CM_EVENT_CONNECT_REQUEST" event and ends up calling "rds_ib_cm_handle_connect". But since the state is already in "RDS_CONN_CONNECTING" (as of #1) it will end up issuing a "rdma_reject" without dropping the connection: if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { /* Wait and see - our connect may still be succeeding */ rds_ib_stats_inc(s_ib_connect_raced); } goto out; #6 Host-A receives an "RDMA_CM_EVENT_REJECTED" event (from #5), drops the connection and tries again (goto #4) until it gives up. Tested-by: Zhu Yanjun Signed-off-by: Gerd Rausch Signed-off-by: Santosh Shilimkar --- net/rds/rdma_transport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 46bce8389066..9db455d02255 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -112,7 +112,9 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, if (!conn) break; err = (int *)rdma_consumer_reject_data(cm_id, event, &len); - if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { + if (!err || + (err && len >= sizeof(*err) && + ((*err) <= RDS_RDMA_REJ_INCOMPAT))) { pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", &conn->c_laddr, &conn->c_faddr); conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; @@ -122,7 +124,6 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); break; - /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: From fc640d4cbe268479ba448542e2b17045c908b2b3 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Fri, 28 Jun 2019 17:41:16 -0700 Subject: [PATCH 04/36] rds: Return proper "tos" value to user-space The proper "tos" value needs to be returned to user-space (sockopt RDS_INFO_CONNECTIONS). Fixes: 3eb450367d08 ("rds: add type of service(tos) infrastructure") Signed-off-by: Gerd Rausch Reviewed-by: Zhu Yanjun Signed-off-by: Santosh Shilimkar --- net/rds/connection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ea134f9a825..ed7f2133acc2 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -736,6 +736,7 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) cinfo->next_rx_seq = cp->cp_next_rx_seq; cinfo->laddr = conn->c_laddr.s6_addr32[3]; cinfo->faddr = conn->c_faddr.s6_addr32[3]; + cinfo->tos = conn->c_tos; strncpy(cinfo->transport, conn->c_trans->t_name, sizeof(cinfo->transport)); cinfo->flags = 0; From dc205a8d34228809dedab94a85a866cbb255248f Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Fri, 28 Jun 2019 15:38:58 -0700 Subject: [PATCH 05/36] rds: avoid version downgrade to legitimate newer peer connections Connections with legitimate tos values can get into usual connection race. It can result in consumer reject. We don't want tos value or protocol version to be demoted for such connections otherwise piers would end up different tos values which can results in no connection. Example a peer initiated connection with say tos 8 while usual connection racing can get downgraded to tos 0 which is not desirable. Patch fixes above issue introduced by commit commit d021fabf525f ("rds: rdma: add consumer reject") Reported-by: Yanjun Zhu Tested-by: Yanjun Zhu Signed-off-by: Santosh Shilimkar --- net/rds/rdma_transport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9db455d02255..ff74c4bbb9fc 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -117,8 +117,10 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, ((*err) <= RDS_RDMA_REJ_INCOMPAT))) { pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", &conn->c_laddr, &conn->c_faddr); - conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; - conn->c_tos = 0; + + if (!conn->c_tos) + conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + rds_conn_drop(conn); } rdsdebug("Connection rejected: %s\n", From 9a64144d683a4395f57562d90247c61a0bf5105f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 17 Jun 2019 13:22:28 +0300 Subject: [PATCH 06/36] net/mlx5: E-Switch, Fix default encap mode Encap mode is related to switchdev mode only. Move the init of the encap mode to eswitch_offloads. Before this change, we reported that eswitch supports encap, even tough the device was in non SRIOV mode. Fixes: 7768d1971de67 ('net/mlx5: E-Switch, Add control for encapsulation') Signed-off-by: Maor Gottlieb Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 ----- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6a921e24cd5e..e9339e7d6a18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1882,11 +1882,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; - else - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 47b446d30f71..c2beadc41c40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1840,6 +1840,12 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, { int err; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; + err = esw_offloads_steering_init(esw, vf_nvports, total_nvports); if (err) return err; @@ -1901,6 +1907,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) esw_offloads_devcom_cleanup(esw); esw_offloads_unload_all_reps(esw, num_vfs); esw_offloads_steering_cleanup(esw); + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) From 914adbb1bcf89478ac138318d28b302704564d59 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Sun, 2 Jun 2019 06:19:03 +0000 Subject: [PATCH 07/36] net/mlx5e: Fix port tunnel GRE entropy control GRE entropy calculation is a single bit per card, and not per port. Force disable GRE entropy calculation upon the first GRE encap rule, and release the force at the last GRE encap rule removal. This is done per port. Fixes: 97417f6182f8 ("net/mlx5e: Fix GRE key by controlling port tunnel entropy calculation") Signed-off-by: Eli Britstein Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/lib/port_tun.c | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c index be69c1d7941a..48b5c847b642 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -98,27 +98,12 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, */ if (entropy_flags.gre_calc_supported && reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { - /* Other applications may change the global FW entropy - * calculations settings. Check that the current entropy value - * is the negative of the updated value. - */ - if (entropy_flags.force_enabled && - enable == entropy_flags.gre_calc_enabled) { - mlx5_core_warn(tun_entropy->mdev, - "Unexpected GRE entropy calc setting - expected %d", - !entropy_flags.gre_calc_enabled); - return -EOPNOTSUPP; - } - err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable, - entropy_flags.force_supported); + if (!entropy_flags.force_supported) + return 0; + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, + enable, !enable); if (err) return err; - /* if we turn on the entropy we don't need to force it anymore */ - if (entropy_flags.force_supported && enable) { - err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0); - if (err) - return err; - } } else if (entropy_flags.calc_supported) { /* Other applications may change the global FW entropy * calculations settings. Check that the current entropy value From db849faa9bef993a1379dc510623f750a72fa7ce Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 3 May 2019 13:14:59 -0700 Subject: [PATCH 08/36] net/mlx5e: Rx, Fix checksum calculation for new hardware CQE checksum full mode in new HW, provides a full checksum of rx frame. Covering bytes starting from eth protocol up to last byte in the received frame (frame_size - ETH_HLEN), as expected by the stack. Fixing up skb->csum by the driver is not required in such case. This fix is to avoid wrong checksum calculation in drivers which already support the new hardware with the new checksum mode. Fixes: 85327a9c4150 ("net/mlx5: Update the list of the PCI supported devices") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 7 ++++++- include/linux/mlx5/mlx5_ifc.h | 3 ++- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc6797e24571..cc227a7aa79f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -294,6 +294,7 @@ enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_AM, MLX5E_RQ_STATE_NO_CSUM_COMPLETE, + MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ }; struct mlx5e_cq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a8e8350b38aa..98d75271fc73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -855,6 +855,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; + if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); + if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 13133e7f088e..8a5f9411cac6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -873,8 +873,14 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; + stats->csum_complete++; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) + return; /* CQE csum covers all received bytes */ + + /* csum might need some fixups ...*/ if (network_depth > ETH_HLEN) /* CQE csum is calculated from the IP header and does * not cover VLAN headers (if present). This will add @@ -885,7 +891,6 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum); mlx5e_skb_padding_csum(skb, network_depth, proto, stats); - stats->csum_complete++; return; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5e74305e2e57..7e42efa143a0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -749,7 +749,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; - u8 reserved_at_23[0xd]; + u8 cqe_checksum_full[0x1]; + u8 reserved_at_24[0xc]; u8 max_vxlan_udp_ports[0x8]; u8 reserved_at_38[0x6]; u8 max_geneve_opt_len[0x1]; From 39825350ae2a52f8513741b36e42118bd80dd689 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 17 Jun 2019 12:01:45 +0300 Subject: [PATCH 09/36] net/mlx5e: Fix return value from timeout recover function Fix timeout recover function to return a meaningful return value. When an interrupt was not sent by the FW, return IO error instead of 'true'. Fixes: c7981bea48fb ("net/mlx5e: Fix return status of TX reporter timeout recover") Signed-off-by: Aya Levin Acked-by: Jiri Pirko Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 476dd97f7f2f..a778c15e5324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -142,22 +142,20 @@ static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) { struct mlx5_eq_comp *eq = sq->cq.mcq.eq; u32 eqe_count; - int ret; netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", eq->core.eqn, eq->core.cons_index, eq->core.irqn); eqe_count = mlx5_eq_poll_irq_disabled(eq); - ret = eqe_count ? false : true; if (!eqe_count) { clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - return ret; + return -EIO; } netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); sq->channel->stats->eq_rearm++; - return ret; + return 0; } int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) From 99d31cbd8953c6929da978bf049ab0f0b4e503d9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 30 Jun 2019 11:11:26 +0300 Subject: [PATCH 10/36] net/mlx5e: Fix error flow in tx reporter diagnose Fix tx reporter's diagnose callback. Propagate error when failing to gather diagnostics information or failing to print diagnostic data per queue. Fixes: de8650a82071 ("net/mlx5e: Add tx reporter support") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index a778c15e5324..f3d98748b211 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -262,13 +262,13 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); if (err) - break; + goto unlock; err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, state, netif_xmit_stopped(sq->txq)); if (err) - break; + goto unlock; } err = devlink_fmsg_arr_pair_nest_end(fmsg); if (err) From ef1ce7d7b67b46661091c7ccc0396186b7a247ef Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 7 Jul 2019 16:57:06 +0300 Subject: [PATCH 11/36] net/mlx5e: IPoIB, Add error path in mlx5_rdma_setup_rn Check return value from mlx5e_attach_netdev, add error path on failure. Fixes: 48935bbb7ae8 ("net/mlx5e: IPoIB, Add netdevice profile skeleton") Signed-off-by: Aya Levin Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 9ca492b430d8..603d294757b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -698,7 +698,9 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, prof->init(mdev, netdev, prof, ipriv); - mlx5e_attach_netdev(epriv); + err = mlx5e_attach_netdev(epriv); + if (err) + goto detach; netif_carrier_off(netdev); /* set rdma_netdev func pointers */ @@ -714,6 +716,11 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, return 0; +detach: + prof->cleanup(epriv); + if (ipriv->sub_interface) + return err; + mlx5e_destroy_mdev_resources(mdev); destroy_ht: mlx5i_pkey_qpn_ht_cleanup(netdev); return err; From 63f9ba1bf8b6550365dc17a65d544cd75e68bf48 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 5 Jul 2019 11:46:43 -0700 Subject: [PATCH 12/36] net: fib_rules: do not flow dissect local packets Rules matching on loopback iif do not need early flow dissection as the packet originates from the host. Stop counting such rules in fib_rule_requires_fldissect Signed-off-by: Petar Penkov Signed-off-by: David S. Miller --- include/net/fib_rules.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index eba8465e1d86..20dcadd8eed9 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -180,9 +180,9 @@ static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { - return rule->ip_proto || + return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || - fib_rule_port_range_set(&rule->dport_range); + fib_rule_port_range_set(&rule->dport_range)); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, From 937a944090cca2f19458fd037a8aff61c546f0cd Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Jul 2019 22:04:20 -0700 Subject: [PATCH 13/36] net: ethernet: mediatek: Fix misuses of GENMASK macro Arguments are supposed to be ordered high then low. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +- drivers/net/ethernet/mediatek/mtk_sgmii.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index c6be599ed94d..bab94f763e2c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -723,7 +723,7 @@ struct mtk_soc_data { #define MTK_MAX_DEVS 2 #define MTK_SGMII_PHYSPEED_AN BIT(31) -#define MTK_SGMII_PHYSPEED_MASK GENMASK(0, 2) +#define MTK_SGMII_PHYSPEED_MASK GENMASK(2, 0) #define MTK_SGMII_PHYSPEED_1000 BIT(0) #define MTK_SGMII_PHYSPEED_2500 BIT(1) #define MTK_HAS_FLAGS(flags, _x) (((flags) & (_x)) == (_x)) diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c index 136f90ce5a65..ff509d42d818 100644 --- a/drivers/net/ethernet/mediatek/mtk_sgmii.c +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -82,7 +82,7 @@ int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id) return -EINVAL; regmap_read(ss->regmap[id], ss->ana_rgc3, &val); - val &= ~GENMASK(2, 3); + val &= ~GENMASK(3, 2); mode = ss->flags[id] & MTK_SGMII_PHYSPEED_MASK; val |= (mode == MTK_SGMII_PHYSPEED_1000) ? 0 : BIT(2); regmap_write(ss->regmap[id], ss->ana_rgc3, val); From aa4c0c9091b0bb4cb261bbe0718d17c2834c4690 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 9 Jul 2019 22:04:21 -0700 Subject: [PATCH 14/36] net: stmmac: Fix misuses of GENMASK macro Arguments are supposed to be ordered high then low. Fixes: 293e4365a1ad ("stmmac: change descriptor layout") Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/descs.h | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index 10429b05f932..9f0b9a9e63b3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -123,7 +123,7 @@ #define ETDES1_BUFFER2_SIZE_SHIFT 16 /* Extended Receive descriptor definitions */ -#define ERDES4_IP_PAYLOAD_TYPE_MASK GENMASK(2, 6) +#define ERDES4_IP_PAYLOAD_TYPE_MASK GENMASK(6, 2) #define ERDES4_IP_HDR_ERR BIT(3) #define ERDES4_IP_PAYLOAD_ERR BIT(4) #define ERDES4_IP_CSUM_BYPASSED BIT(5) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 2856f3fe5266..4083019c547a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -192,7 +192,7 @@ static const struct emac_variant emac_variant_h6 = { /* Used in RX_CTL1*/ #define EMAC_RX_MD BIT(1) -#define EMAC_RX_TH_MASK GENMASK(4, 5) +#define EMAC_RX_TH_MASK GENMASK(5, 4) #define EMAC_RX_TH_32 0 #define EMAC_RX_TH_64 (0x1 << 4) #define EMAC_RX_TH_96 (0x2 << 4) @@ -203,7 +203,7 @@ static const struct emac_variant emac_variant_h6 = { /* Used in TX_CTL1*/ #define EMAC_TX_MD BIT(1) #define EMAC_TX_NEXT_FRM BIT(2) -#define EMAC_TX_TH_MASK GENMASK(8, 10) +#define EMAC_TX_TH_MASK GENMASK(10, 8) #define EMAC_TX_TH_64 0 #define EMAC_TX_TH_128 (0x1 << 8) #define EMAC_TX_TH_192 (0x2 << 8) From 311633b604063a8a5d3fbc74d0565b42df721f68 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 9 Jul 2019 23:24:54 -0700 Subject: [PATCH 15/36] hsr: switch ->dellink() to ->ndo_uninit() Switching from ->priv_destructor to dellink() has an unexpected consequence: existing RCU readers, that is, hsr_port_get_hsr() callers, may still be able to read the port list. Instead of checking the return value of each hsr_port_get_hsr(), we can just move it to ->ndo_uninit() which is called after device unregister and synchronize_net(), and we still have RTNL lock there. Fixes: b9a1e627405d ("hsr: implement dellink to clean up resources") Fixes: edf070a0fb45 ("hsr: fix a NULL pointer deref in hsr_dev_xmit()") Reported-by: syzbot+097ef84cdc95843fbaa8@syzkaller.appspotmail.com Cc: Arvid Brodin Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/hsr/hsr_device.c | 18 ++++++++---------- net/hsr/hsr_device.h | 1 - net/hsr/hsr_netlink.c | 7 ------- 3 files changed, 8 insertions(+), 18 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index f0f9b493c47b..f509b495451a 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -227,13 +227,8 @@ static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct hsr_port *master; master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); - if (master) { - skb->dev = master->dev; - hsr_forward_skb(skb, master); - } else { - atomic_long_inc(&dev->tx_dropped); - dev_kfree_skb_any(skb); - } + skb->dev = master->dev; + hsr_forward_skb(skb, master); return NETDEV_TX_OK; } @@ -348,7 +343,11 @@ static void hsr_announce(struct timer_list *t) rcu_read_unlock(); } -void hsr_dev_destroy(struct net_device *hsr_dev) +/* This has to be called after all the readers are gone. + * Otherwise we would have to check the return value of + * hsr_port_get_hsr(). + */ +static void hsr_dev_destroy(struct net_device *hsr_dev) { struct hsr_priv *hsr; struct hsr_port *port; @@ -364,8 +363,6 @@ void hsr_dev_destroy(struct net_device *hsr_dev) del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); - synchronize_rcu(); - hsr_del_self_node(&hsr->self_node_db); hsr_del_nodes(&hsr->node_db); } @@ -376,6 +373,7 @@ static const struct net_device_ops hsr_device_ops = { .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, .ndo_fix_features = hsr_fix_features, + .ndo_uninit = hsr_dev_destroy, }; static struct device_type hsr_type = { diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h index d0fa6b0696d2..6d7759c4f5f9 100644 --- a/net/hsr/hsr_device.h +++ b/net/hsr/hsr_device.h @@ -14,7 +14,6 @@ void hsr_dev_setup(struct net_device *dev); int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], unsigned char multicast_spec, u8 protocol_version); -void hsr_dev_destroy(struct net_device *hsr_dev); void hsr_check_carrier_and_operstate(struct hsr_priv *hsr); bool is_hsr_master(struct net_device *dev); int hsr_get_max_mtu(struct hsr_priv *hsr); diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 160edd24de4e..8f8337f893ba 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -69,12 +69,6 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec, hsr_version); } -static void hsr_dellink(struct net_device *hsr_dev, struct list_head *head) -{ - hsr_dev_destroy(hsr_dev); - unregister_netdevice_queue(hsr_dev, head); -} - static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct hsr_priv *hsr; @@ -119,7 +113,6 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, - .dellink = hsr_dellink, .fill_info = hsr_fill_info, }; From 416e8126a2672f6e91e9e81c6f5c07cf46808b13 Mon Sep 17 00:00:00 2001 From: yangxingwu Date: Wed, 10 Jul 2019 21:14:10 +0800 Subject: [PATCH 16/36] ipv6: Use ipv6_authlen for len The length of AH header is computed manually as (hp->hdrlen+2)<<2. However, in include/linux/ipv6.h, a macro named ipv6_authlen is already defined for exactly the same job. This commit replaces the manual computation code with the macro. Signed-off-by: yangxingwu Signed-off-by: David S. Miller --- net/ipv6/ah6.c | 4 ++-- net/ipv6/exthdrs_core.c | 2 +- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/netfilter/ip6t_ah.c | 2 +- net/ipv6/netfilter/ip6t_ipv6header.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 25e1172fd1c3..95835e8d99aa 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -464,7 +464,7 @@ static void ah6_input_done(struct crypto_async_request *base, int err) struct ah_data *ahp = x->data; struct ip_auth_hdr *ah = ip_auth_hdr(skb); int hdr_len = skb_network_header_len(skb); - int ah_hlen = (ah->hdrlen + 2) << 2; + int ah_hlen = ipv6_authlen(ah); if (err) goto out; @@ -546,7 +546,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ahash = ahp->ahash; nexthdr = ah->nexthdr; - ah_hlen = (ah->hdrlen + 2) << 2; + ah_hlen = ipv6_authlen(ah); if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 11a43ee4dd45..b358f1a4dd08 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -266,7 +266,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } else if (nexthdr == NEXTHDR_AUTH) { if (flags && (*flags & IP6_FH_F_AUTH) && (target < 0)) break; - hdrlen = (hp->hdrlen + 2) << 2; + hdrlen = ipv6_authlen(hp); } else hdrlen = ipv6_optlen(hp); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b80fde1bc005..3134fbb65d7f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -416,7 +416,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) break; optlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { - optlen = (hdr->hdrlen + 2) << 2; + optlen = ipv6_authlen(hdr); } else { optlen = ipv6_optlen(hdr); } diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 0228ff3636bb..4e15a14435e4 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -55,7 +55,7 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - hdrlen = (ah->hdrlen + 2) << 2; + hdrlen = ipv6_authlen(ah); pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); pr_debug("RES %04X ", ah->reserved); diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index fd439f88377f..0fc6326ef499 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -71,7 +71,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (nexthdr == NEXTHDR_FRAGMENT) hdrlen = 8; else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen + 2) << 2; + hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 398e1df41406..0f82c150543b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -414,7 +414,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) BUG(); if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hdr.hdrlen+2)<<2; + hdrlen = ipv6_authlen(&hdr); else hdrlen = ipv6_optlen(&hdr); diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 549c51156d5d..f53bd8f01219 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -155,7 +155,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, } - hdrlen = (hp->hdrlen+2)<<2; + hdrlen = ipv6_authlen(hp); break; case IPPROTO_ESP: if (logflags & NF_LOG_IPOPT) { From 052e0690f1f62f76493ba996d73847c7ca9fd132 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jul 2019 06:40:09 -0700 Subject: [PATCH 17/36] ipv6: tcp: fix flowlabels reflection for RST packets In 323a53c41292 ("ipv6: tcp: enable flowlabel reflection in some RST packets") and 50a8accf1062 ("ipv6: tcp: send consistent flowlabel in TIME_WAIT state") we took care of IPv6 flowlabel reflections for two cases. This patch takes care of the remaining case, when the RST packet is sent on behalf of a 'full' socket. In Marek use case, this was a socket in TCP_CLOSE state. Signed-off-by: Eric Dumazet Reported-by: Marek Majkowski Tested-by: Marek Majkowski Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d56a9019a0fe..5da069e91cac 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -984,8 +984,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) { oif = sk->sk_bound_dev_if; - if (sk_fullsock(sk)) + if (sk_fullsock(sk)) { + const struct ipv6_pinfo *np = tcp_inet6_sk(sk); + trace_tcp_send_reset(sk, skb); + if (np->repflow) + label = ip6_flowlabel(ipv6h); + } if (sk->sk_state == TCP_TIME_WAIT) label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); } else { From 8975a3abc3030bc8cdc3c94b988bcf819a14ed41 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jul 2019 06:40:10 -0700 Subject: [PATCH 18/36] ipv6: fix potential crash in ip6_datagram_dst_update() Willem forgot to change one of the calls to fl6_sock_lookup(), which can now return an error or NULL. syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 31763 Comm: syz-executor.0 Not tainted 5.2.0-rc6+ #63 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:ip6_datagram_dst_update+0x559/0xc30 net/ipv6/datagram.c:83 Code: 00 00 e8 ea 29 3f fb 4d 85 f6 0f 84 96 04 00 00 e8 dc 29 3f fb 49 8d 7e 20 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 16 06 00 00 4d 8b 6e 20 e8 b4 29 3f fb 4c 89 ee RSP: 0018:ffff88809ba97ae0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff8880a81254b0 RCX: ffffc90008118000 RDX: 0000000000000003 RSI: ffffffff86319a84 RDI: 000000000000001e RBP: ffff88809ba97c10 R08: ffff888065e9e700 R09: ffffed1015d26c80 R10: ffffed1015d26c7f R11: ffff8880ae9363fb R12: ffff8880a8124f40 R13: 0000000000000001 R14: fffffffffffffffe R15: ffff88809ba97b40 FS: 00007f38e606a700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000202c0140 CR3: 00000000a026a000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ip6_datagram_connect+0x5e9/0x1390 net/ipv6/datagram.c:246 ip6_datagram_connect+0x30/0x50 net/ipv6/datagram.c:269 ip6_datagram_connect_v6_only+0x69/0x90 net/ipv6/datagram.c:281 inet_dgram_connect+0x14a/0x2d0 net/ipv4/af_inet.c:571 __sys_connect+0x264/0x330 net/socket.c:1824 __do_sys_connect net/socket.c:1835 [inline] __se_sys_connect net/socket.c:1832 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1832 do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4597c9 Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f38e6069c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00000000004597c9 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000003 RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f38e606a6d4 R13: 00000000004bfd07 R14: 00000000004d1838 R15: 00000000ffffffff Modules linked in: RIP: 0010:ip6_datagram_dst_update+0x559/0xc30 net/ipv6/datagram.c:83 Code: 00 00 e8 ea 29 3f fb 4d 85 f6 0f 84 96 04 00 00 e8 dc 29 3f fb 49 8d 7e 20 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 16 06 00 00 4d 8b 6e 20 e8 b4 29 3f fb 4c 89 ee Fixes: 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") Signed-off-by: Eric Dumazet Acked-by: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9d78c907b918..9ab897ded4df 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -74,7 +74,7 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); - if (!flowlabel) + if (IS_ERR(flowlabel)) return -EINVAL; } ip6_datagram_flow_key_init(&fl6, sk); From d44e3fa5d7e6e9573c69f6f9f4f7f3200b0c9eee Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 10 Jul 2019 06:40:11 -0700 Subject: [PATCH 19/36] ipv6: fix static key imbalance in fl_create() fl_create() should call static_branch_deferred_inc() only in case of success. Also we should not call fl_free() in error path, as this could cause a static key imbalance. jump label: negative count! WARNING: CPU: 0 PID: 15907 at kernel/jump_label.c:221 static_key_slow_try_dec kernel/jump_label.c:221 [inline] WARNING: CPU: 0 PID: 15907 at kernel/jump_label.c:221 static_key_slow_try_dec+0x1ab/0x1d0 kernel/jump_label.c:206 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 15907 Comm: syz-executor.2 Not tainted 5.2.0-rc6+ #62 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 panic+0x2cb/0x744 kernel/panic.c:219 __warn.cold+0x20/0x4d kernel/panic.c:576 report_bug+0x263/0x2b0 lib/bug.c:186 fixup_bug arch/x86/kernel/traps.c:179 [inline] fixup_bug arch/x86/kernel/traps.c:174 [inline] do_error_trap+0x11b/0x200 arch/x86/kernel/traps.c:272 do_invalid_op+0x37/0x50 arch/x86/kernel/traps.c:291 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:986 RIP: 0010:static_key_slow_try_dec kernel/jump_label.c:221 [inline] RIP: 0010:static_key_slow_try_dec+0x1ab/0x1d0 kernel/jump_label.c:206 Code: c0 e8 e9 3e e5 ff 83 fb 01 0f 85 32 ff ff ff e8 5b 3d e5 ff 45 31 ff eb a0 e8 51 3d e5 ff 48 c7 c7 40 99 92 87 e8 13 75 b7 ff <0f> 0b eb 8b 4c 89 e7 e8 a9 c0 1e 00 e9 de fe ff ff e8 bf 6d b7 ff RSP: 0018:ffff88805f9c7450 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: 0000000000000000 RDX: 000000000000e3e1 RSI: ffffffff815adb06 RDI: ffffed100bf38e7c RBP: ffff88805f9c74e0 R08: ffff88806acf0700 R09: ffffed1015d060a9 R10: ffffed1015d060a8 R11: ffff8880ae830547 R12: ffffffff89832ce0 R13: ffff88805f9c74b8 R14: 1ffff1100bf38e8b R15: 00000000ffffff01 __static_key_slow_dec_deferred+0x65/0x110 kernel/jump_label.c:272 fl_free+0xa9/0xe0 net/ipv6/ip6_flowlabel.c:121 fl_create+0x6af/0x9f0 net/ipv6/ip6_flowlabel.c:457 ipv6_flowlabel_opt+0x80e/0x2730 net/ipv6/ip6_flowlabel.c:624 do_ipv6_setsockopt.isra.0+0x2119/0x4100 net/ipv6/ipv6_sockglue.c:825 ipv6_setsockopt+0xf6/0x170 net/ipv6/ipv6_sockglue.c:944 tcp_setsockopt net/ipv4/tcp.c:3131 [inline] tcp_setsockopt+0x8f/0xe0 net/ipv4/tcp.c:3125 sock_common_setsockopt+0x94/0xd0 net/core/sock.c:3130 __sys_setsockopt+0x253/0x4b0 net/socket.c:2080 __do_sys_setsockopt net/socket.c:2096 [inline] __se_sys_setsockopt net/socket.c:2093 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:2093 do_syscall_64+0xfd/0x680 arch/x86/entry/common.c:301 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4597c9 Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2670556c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004597c9 RDX: 0000000000000020 RSI: 0000000000000029 RDI: 0000000000000003 RBP: 000000000075bfc8 R08: 000000000000fdf7 R09: 0000000000000000 R10: 0000000020000000 R11: 0000000000000246 R12: 00007f26705576d4 R13: 00000000004cec00 R14: 00000000004dd520 R15: 00000000ffffffff Kernel Offset: disabled Rebooting in 86400 seconds.. Fixes: 59c820b2317f ("ipv6: elide flowlabel check if no exclusive leases exist") Signed-off-by: Eric Dumazet Acked-by: Willem de Bruijn Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index ad284b1fd308..d64b83e85642 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -435,8 +435,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, } fl->dst = freq->flr_dst; atomic_set(&fl->users, 1); - if (fl_shared_exclusive(fl) || fl->opt) - static_branch_deferred_inc(&ipv6_flowlabel_exclusive); switch (fl->share) { case IPV6_FL_S_EXCL: case IPV6_FL_S_ANY: @@ -451,10 +449,15 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -EINVAL; goto done; } + if (fl_shared_exclusive(fl) || fl->opt) + static_branch_deferred_inc(&ipv6_flowlabel_exclusive); return fl; done: - fl_free(fl); + if (fl) { + kfree(fl->opt); + kfree(fl); + } *err_p = err; return NULL; } From c93dfec10f1d693a897bfd0d6e3a58a5ea7edc91 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 11 Jul 2019 19:39:55 +0000 Subject: [PATCH 20/36] net/mlx5e: Fix compilation error in TLS code In the cited patch below, the Kconfig flags combination of: CONFIG_MLX5_FPGA is not set CONFIG_MLX5_TLS=y CONFIG_MLX5_EN_TLS=y leads to the compilation error: ./include/linux/mlx5/device.h:61:39: error: invalid application of sizeof to incomplete type struct mlx5_ifc_tls_flow_bits. Fix it. Fixes: 90687e1a9a50 ("net/mlx5: Kconfig, Better organize compilation flags") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed CC: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index 879321b21616..d787bc0a4155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -81,7 +81,6 @@ mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, struct tls_crypto_info *crypto_info) { return false; } #endif -#ifdef CONFIG_MLX5_FPGA_TLS enum { MLX5_ACCEL_TLS_TX = BIT(0), MLX5_ACCEL_TLS_RX = BIT(1), @@ -103,6 +102,7 @@ struct mlx5_ifc_tls_flow_bits { u8 reserved_at_2[0x1e]; }; +#ifdef CONFIG_MLX5_FPGA_TLS int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn, u32 *p_swid, From 2f1f5a7731df239a0d1965bc6b75ac37dcb803f3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jul 2019 19:39:57 +0000 Subject: [PATCH 21/36] net/mlx5e: Fix unused variable warning when CONFIG_MLX5_ESWITCH is off In mlx5e_setup_tc "priv" variable is not being used if CONFIG_MLX5_ESWITCH is off, one way to fix this is to actually use it. mlx5e_setup_tc_mqprio also needs the "priv" variable and it extracts it on its own. We can simply pass priv to mlx5e_setup_tc_mqprio instead of netdev and avoid extracting the priv var, which will also resolve the compiler warning. Fixes: 4e95bc268b91 ("net: flow_offload: add flow_block_cb_setup_simple()") Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Tariq Toukan CC: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6d0ae87c8ded..9163d6904741 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3390,10 +3390,9 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } -static int mlx5e_setup_tc_mqprio(struct net_device *netdev, +static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, struct tc_mqprio_qopt *mqprio) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; u8 tc = mqprio->num_tc; int err = 0; @@ -3475,7 +3474,7 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, priv, priv, true); #endif case TC_SETUP_QDISC_MQPRIO: - return mlx5e_setup_tc_mqprio(dev, type_data); + return mlx5e_setup_tc_mqprio(priv, type_data); default: return -EOPNOTSUPP; } From 9446d17e0e21889046568e5f970e9c706acb01f5 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 11 Jul 2019 19:39:59 +0000 Subject: [PATCH 22/36] net/mlx5: E-Switch, Reduce ingress acl modify metadata stack usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compiler warning: In function ‘esw_vport_add_ingress_acl_modify_metadata’: the frame size of 1084 bytes is larger than 1024 bytes [-Wframe-larger-than=] Since the structure is never written to, we can statically allocate it to avoid the stack usage. Fixes: 7445cfb1169c ("net/mlx5: E-Switch, Tag packet with vport number in VF vports and uplink ingress ACLs") Signed-off-by: Saeed Mahameed Reviewed-by: Jianbo Liu Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8ed4497929b9..5f78e76019c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1785,8 +1785,8 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec spec = {}; int err = 0; MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); From 9db7e618fca34d0a7d61c149d726fd90644ecb1e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 9 Jul 2019 23:06:15 -0700 Subject: [PATCH 23/36] net/mlx5e: Convert single case statement switch statements into if statements During the review of commit 1ff2f0fa450e ("net/mlx5e: Return in default case statement in tx_post_resync_params"), Leon and Nick pointed out that the switch statements can be converted to single if statements that return early so that the code is easier to follow. Suggested-by: Leon Romanovsky Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- .../mellanox/mlx5/core/en_accel/ktls_tx.c | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 5c08891806f0..ea032f54197e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -25,23 +25,17 @@ static void fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) { struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + struct tls12_crypto_info_aes_gcm_128 *info; char *initial_rn, *gcm_iv; u16 salt_sz, rec_seq_sz; char *salt, *rec_seq; u8 tls_version; - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *info = - (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; - - EXTRACT_INFO_FIELDS; - break; - } - default: - WARN_ON(1); + if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) return; - } + + info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + EXTRACT_INFO_FIELDS; gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); @@ -234,24 +228,18 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, u64 rcd_sn) { struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + struct tls12_crypto_info_aes_gcm_128 *info; __be64 rn_be = cpu_to_be64(rcd_sn); bool skip_static_post; u16 rec_seq_sz; char *rec_seq; - switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - struct tls12_crypto_info_aes_gcm_128 *info = - (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; - - rec_seq = info->rec_seq; - rec_seq_sz = sizeof(info->rec_seq); - break; - } - default: - WARN_ON(1); + if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128)) return; - } + + info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); if (!skip_static_post) From 6b660c4177aaebdc73df7a3378f0e8b110aa4b51 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 6 Jul 2019 01:08:09 +0900 Subject: [PATCH 24/36] net: openvswitch: do not update max_headroom if new headroom is equal to old headroom When a vport is deleted, the maximum headroom size would be changed. If the vport which has the largest headroom is deleted, the new max_headroom would be set. But, if the new headroom size is equal to the old headroom size, updating routine is unnecessary. Signed-off-by: Taehee Yoo Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 33b388103741..892287d06c17 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1958,10 +1958,9 @@ static struct vport *lookup_vport(struct net *net, } -/* Called with ovs_mutex */ -static void update_headroom(struct datapath *dp) +static unsigned int ovs_get_max_headroom(struct datapath *dp) { - unsigned dev_headroom, max_headroom = 0; + unsigned int dev_headroom, max_headroom = 0; struct net_device *dev; struct vport *vport; int i; @@ -1975,10 +1974,19 @@ static void update_headroom(struct datapath *dp) } } - dp->max_headroom = max_headroom; + return max_headroom; +} + +/* Called with ovs_mutex */ +static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) +{ + struct vport *vport; + int i; + + dp->max_headroom = new_headroom; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) - netdev_set_rx_headroom(vport->dev, max_headroom); + netdev_set_rx_headroom(vport->dev, new_headroom); } static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1989,6 +1997,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct vport *vport; struct datapath *dp; + unsigned int new_headroom; u32 port_no; int err; @@ -2050,8 +2059,10 @@ restart: info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW); - if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) - update_headroom(dp); + new_headroom = netdev_get_fwd_headroom(vport->dev); + + if (new_headroom > dp->max_headroom) + ovs_update_headroom(dp, new_headroom); else netdev_set_rx_headroom(vport->dev, dp->max_headroom); @@ -2122,11 +2133,12 @@ exit_unlock_free: static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) { - bool must_update_headroom = false; + bool update_headroom = false; struct nlattr **a = info->attrs; struct sk_buff *reply; struct datapath *dp; struct vport *vport; + unsigned int new_headroom; int err; reply = ovs_vport_cmd_alloc_info(); @@ -2152,12 +2164,17 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) /* the vport deletion may trigger dp headroom update */ dp = vport->dp; if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) - must_update_headroom = true; + update_headroom = true; + netdev_reset_rx_headroom(vport->dev); ovs_dp_detach_port(vport); - if (must_update_headroom) - update_headroom(dp); + if (update_headroom) { + new_headroom = ovs_get_max_headroom(dp); + + if (new_headroom < dp->max_headroom) + ovs_update_headroom(dp, new_headroom); + } ovs_unlock(); ovs_notify(&dp_vport_genl_family, reply, info); From c653f61a7f3ef05db173fe4132424db17edeee6b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jul 2019 10:00:33 +0200 Subject: [PATCH 25/36] davinci_cpdma: don't cast dma_addr_t to pointer dma_addr_t may be 64-bit wide on 32-bit architectures, so it is not valid to cast between it and a pointer: drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_submit_si': drivers/net/ethernet/ti/davinci_cpdma.c:1047:12: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_idle_submit_mapped': drivers/net/ethernet/ti/davinci_cpdma.c:1114:12: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_submit_mapped': drivers/net/ethernet/ti/davinci_cpdma.c:1164:12: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] Solve this by using two separate members in 'struct submit_info'. Since this avoids the use of the 'flag' member, the structure does not even grow in typical configurations. Fixes: 6670acacd59e ("net: ethernet: ti: davinci_cpdma: add dma mapped submit") Signed-off-by: Arnd Bergmann Reviewed-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 0ca2a1a254de..a65edd2770e6 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -138,8 +138,8 @@ struct submit_info { struct cpdma_chan *chan; int directed; void *token; - void *data; - int flags; + void *data_virt; + dma_addr_t data_dma; int len; }; @@ -1043,12 +1043,12 @@ static int cpdma_chan_submit_si(struct submit_info *si) mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP; cpdma_desc_to_port(chan, mode, si->directed); - if (si->flags & CPDMA_DMA_EXT_MAP) { - buffer = (dma_addr_t)si->data; + if (si->data_dma) { + buffer = si->data_dma; dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir); swlen |= CPDMA_DMA_EXT_MAP; } else { - buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir); + buffer = dma_map_single(ctlr->dev, si->data_virt, len, chan->dir); ret = dma_mapping_error(ctlr->dev, buffer); if (ret) { cpdma_desc_free(ctlr->pool, desc, 1); @@ -1086,10 +1086,10 @@ int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, si.chan = chan; si.token = token; - si.data = data; + si.data_virt = data; + si.data_dma = 0; si.len = len; si.directed = directed; - si.flags = 0; spin_lock_irqsave(&chan->lock, flags); if (chan->state == CPDMA_STATE_TEARDOWN) { @@ -1111,10 +1111,10 @@ int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, si.chan = chan; si.token = token; - si.data = (void *)data; + si.data_virt = NULL; + si.data_dma = data; si.len = len; si.directed = directed; - si.flags = CPDMA_DMA_EXT_MAP; spin_lock_irqsave(&chan->lock, flags); if (chan->state == CPDMA_STATE_TEARDOWN) { @@ -1136,10 +1136,10 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, si.chan = chan; si.token = token; - si.data = data; + si.data_virt = data; + si.data_dma = 0; si.len = len; si.directed = directed; - si.flags = 0; spin_lock_irqsave(&chan->lock, flags); if (chan->state != CPDMA_STATE_ACTIVE) { @@ -1161,10 +1161,10 @@ int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, si.chan = chan; si.token = token; - si.data = (void *)data; + si.data_virt = NULL; + si.data_dma = data; si.len = len; si.directed = directed; - si.flags = CPDMA_DMA_EXT_MAP; spin_lock_irqsave(&chan->lock, flags); if (chan->state != CPDMA_STATE_ACTIVE) { From c1a970d06f8cf390354a4a426976ed7f960b71f1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 10 Jul 2019 20:12:29 +0300 Subject: [PATCH 26/36] net: sched: Fix NULL-pointer dereference in tc_indr_block_ing_cmd() After recent refactoring of block offlads infrastructure, indr_dev->block pointer is dereferenced before it is verified to be non-NULL. Example stack trace where this behavior leads to NULL-pointer dereference error when creating vxlan dev on system with mlx5 NIC with offloads enabled: [ 1157.852938] ================================================================== [ 1157.866877] BUG: KASAN: null-ptr-deref in tc_indr_block_ing_cmd.isra.41+0x9c/0x160 [ 1157.880877] Read of size 4 at addr 0000000000000090 by task ip/3829 [ 1157.901637] CPU: 22 PID: 3829 Comm: ip Not tainted 5.2.0-rc6+ #488 [ 1157.914438] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 1157.929031] Call Trace: [ 1157.938318] dump_stack+0x9a/0xeb [ 1157.948362] ? tc_indr_block_ing_cmd.isra.41+0x9c/0x160 [ 1157.960262] ? tc_indr_block_ing_cmd.isra.41+0x9c/0x160 [ 1157.972082] __kasan_report+0x176/0x192 [ 1157.982513] ? tc_indr_block_ing_cmd.isra.41+0x9c/0x160 [ 1157.994348] kasan_report+0xe/0x20 [ 1158.004324] tc_indr_block_ing_cmd.isra.41+0x9c/0x160 [ 1158.015950] ? tcf_block_setup+0x430/0x430 [ 1158.026558] ? kasan_unpoison_shadow+0x30/0x40 [ 1158.037464] __tc_indr_block_cb_register+0x5f5/0xf20 [ 1158.049288] ? mlx5e_rep_indr_tc_block_unbind+0xa0/0xa0 [mlx5_core] [ 1158.062344] ? tc_indr_block_dev_put.part.47+0x5c0/0x5c0 [ 1158.074498] ? rdma_roce_rescan_device+0x20/0x20 [ib_core] [ 1158.086580] ? br_device_event+0x98/0x480 [bridge] [ 1158.097870] ? strcmp+0x30/0x50 [ 1158.107578] mlx5e_nic_rep_netdevice_event+0xdd/0x180 [mlx5_core] [ 1158.120212] notifier_call_chain+0x6d/0xa0 [ 1158.130753] register_netdevice+0x6fc/0x7e0 [ 1158.141322] ? netdev_change_features+0xa0/0xa0 [ 1158.152218] ? vxlan_config_apply+0x210/0x310 [vxlan] [ 1158.163593] __vxlan_dev_create+0x2ad/0x520 [vxlan] [ 1158.174770] ? vxlan_changelink+0x490/0x490 [vxlan] [ 1158.185870] ? rcu_read_unlock+0x60/0x60 [vxlan] [ 1158.196798] vxlan_newlink+0x99/0xf0 [vxlan] [ 1158.207303] ? __vxlan_dev_create+0x520/0x520 [vxlan] [ 1158.218601] ? rtnl_create_link+0x3d0/0x450 [ 1158.228900] __rtnl_newlink+0x8a7/0xb00 [ 1158.238701] ? stack_access_ok+0x35/0x80 [ 1158.248450] ? rtnl_link_unregister+0x1a0/0x1a0 [ 1158.258735] ? find_held_lock+0x6d/0xd0 [ 1158.268379] ? is_bpf_text_address+0x67/0xf0 [ 1158.278330] ? lock_acquire+0xc1/0x1f0 [ 1158.287686] ? is_bpf_text_address+0x5/0xf0 [ 1158.297449] ? is_bpf_text_address+0x86/0xf0 [ 1158.307310] ? kernel_text_address+0xec/0x100 [ 1158.317155] ? arch_stack_walk+0x92/0xe0 [ 1158.326497] ? __kernel_text_address+0xe/0x30 [ 1158.336213] ? unwind_get_return_address+0x2f/0x50 [ 1158.346267] ? create_prof_cpu_mask+0x20/0x20 [ 1158.355936] ? arch_stack_walk+0x92/0xe0 [ 1158.365117] ? stack_trace_save+0x8a/0xb0 [ 1158.374272] ? stack_trace_consume_entry+0x80/0x80 [ 1158.384226] ? match_held_lock+0x33/0x210 [ 1158.393216] ? kasan_unpoison_shadow+0x30/0x40 [ 1158.402593] rtnl_newlink+0x53/0x80 [ 1158.410925] rtnetlink_rcv_msg+0x3a5/0x600 [ 1158.419777] ? validate_linkmsg+0x400/0x400 [ 1158.428620] ? find_held_lock+0x6d/0xd0 [ 1158.437117] ? match_held_lock+0x1b/0x210 [ 1158.445760] ? validate_linkmsg+0x400/0x400 [ 1158.454642] netlink_rcv_skb+0xc7/0x1f0 [ 1158.463150] ? netlink_ack+0x470/0x470 [ 1158.471538] ? netlink_deliver_tap+0x1f3/0x5a0 [ 1158.480607] netlink_unicast+0x2ae/0x350 [ 1158.489099] ? netlink_attachskb+0x340/0x340 [ 1158.497935] ? _copy_from_iter_full+0xde/0x3b0 [ 1158.506945] ? __virt_addr_valid+0xb6/0xf0 [ 1158.515578] ? __check_object_size+0x159/0x240 [ 1158.524515] netlink_sendmsg+0x4d3/0x630 [ 1158.532879] ? netlink_unicast+0x350/0x350 [ 1158.541400] ? netlink_unicast+0x350/0x350 [ 1158.549805] sock_sendmsg+0x94/0xa0 [ 1158.557561] ___sys_sendmsg+0x49d/0x570 [ 1158.565625] ? copy_msghdr_from_user+0x210/0x210 [ 1158.574457] ? __fput+0x1e2/0x330 [ 1158.581948] ? __kasan_slab_free+0x130/0x180 [ 1158.590407] ? kmem_cache_free+0xb6/0x2d0 [ 1158.598574] ? mark_lock+0xc7/0x790 [ 1158.606177] ? task_work_run+0xcf/0x100 [ 1158.614165] ? exit_to_usermode_loop+0x102/0x110 [ 1158.622954] ? __lock_acquire+0x963/0x1ee0 [ 1158.631199] ? lockdep_hardirqs_on+0x260/0x260 [ 1158.639777] ? match_held_lock+0x1b/0x210 [ 1158.647918] ? lockdep_hardirqs_on+0x260/0x260 [ 1158.656501] ? match_held_lock+0x1b/0x210 [ 1158.664643] ? __fget_light+0xa6/0xe0 [ 1158.672423] ? __sys_sendmsg+0xd2/0x150 [ 1158.680334] __sys_sendmsg+0xd2/0x150 [ 1158.688063] ? __ia32_sys_shutdown+0x30/0x30 [ 1158.696435] ? lock_downgrade+0x2e0/0x2e0 [ 1158.704541] ? mark_held_locks+0x1a/0x90 [ 1158.712611] ? mark_held_locks+0x1a/0x90 [ 1158.720619] ? do_syscall_64+0x1e/0x2c0 [ 1158.728530] do_syscall_64+0x78/0x2c0 [ 1158.736254] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1158.745414] RIP: 0033:0x7f62d505cb87 [ 1158.753070] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00[87/1817] 48 89 f3 48 [ 1158.780924] RSP: 002b:00007fffd9832268 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 1158.793204] RAX: ffffffffffffffda RBX: 000000005d26048f RCX: 00007f62d505cb87 [ 1158.805111] RDX: 0000000000000000 RSI: 00007fffd98322d0 RDI: 0000000000000003 [ 1158.817055] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006 [ 1158.828987] R10: 00007f62d50ce260 R11: 0000000000000246 R12: 0000000000000001 [ 1158.840909] R13: 000000000067e540 R14: 0000000000000000 R15: 000000000067ed20 [ 1158.852873] ================================================================== Introduce new function tcf_block_non_null_shared() that verifies block pointer before dereferencing it to obtain index. Use the function in tc_indr_block_ing_cmd() to prevent NULL pointer dereference. Fixes: 955bcb6ea0df ("drivers: net: use flow block API") Signed-off-by: Vlad Buslov Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 ++++++++++ net/sched/cls_api.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b03d466182db..841faadceb6e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -60,6 +60,11 @@ static inline bool tcf_block_shared(struct tcf_block *block) return block->index; } +static inline bool tcf_block_non_null_shared(struct tcf_block *block) +{ + return block && block->index; +} + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { WARN_ON(tcf_block_shared(block)); @@ -84,6 +89,11 @@ static inline bool tcf_block_shared(struct tcf_block *block) return false; } +static inline bool tcf_block_non_null_shared(struct tcf_block *block) +{ + return false; +} + static inline int tcf_block_get(struct tcf_block **p_block, struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 638c1bc1ea1b..278014e26aec 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -684,7 +684,7 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, .command = command, .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_shared(indr_dev->block), + .block_shared = tcf_block_non_null_shared(indr_dev->block), }; INIT_LIST_HEAD(&bo.cb_list); From 54638c6eaf445ecf901128599cfeb4620be47d2f Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 10 Jul 2019 21:03:24 +0300 Subject: [PATCH 27/36] net: phy: make exported variables non-static The variables phy_basic_ports_array, phy_fibre_port_array and phy_all_ports_features_array are declared static and marked EXPORT_SYMBOL_GPL(), which is at best an odd combination. Because the variables were decided to be a part of API, this commit removes the static attributes and adds the declarations to the header. Fixes: 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode") Signed-off-by: Denis Efremov Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 6 +++--- include/linux/phy.h | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 53878908adf4..6b5cb87f3866 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -56,19 +56,19 @@ EXPORT_SYMBOL_GPL(phy_10gbit_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_fec_features); -static const int phy_basic_ports_array[] = { +const int phy_basic_ports_array[3] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, }; EXPORT_SYMBOL_GPL(phy_basic_ports_array); -static const int phy_fibre_port_array[] = { +const int phy_fibre_port_array[1] = { ETHTOOL_LINK_MODE_FIBRE_BIT, }; EXPORT_SYMBOL_GPL(phy_fibre_port_array); -static const int phy_all_ports_features_array[] = { +const int phy_all_ports_features_array[7] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, diff --git a/include/linux/phy.h b/include/linux/phy.h index 1739c6dc470e..462b90b73f93 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -55,6 +55,9 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_ini #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) +extern const int phy_basic_ports_array[3]; +extern const int phy_fibre_port_array[1]; +extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; extern const int phy_basic_t1_features_array[2]; extern const int phy_gbit_features_array[2]; From 3929502b957ed05575e74134a817f26c42d13e2c Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 10 Jul 2019 21:25:54 +0300 Subject: [PATCH 28/36] net/mlx5e: Provide cb_list pointer when setting up tc block on rep Recent refactoring of tc block offloads infrastructure introduced new flow_block_cb_setup_simple() method intended to be used as unified way for all drivers to register offload callbacks. However, commit that actually extended all users (drivers) with block cb list and provided it to flow_block infra missed mlx5 en_rep. This leads to following NULL-pointer dereference when creating Qdisc: [ 278.385175] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 278.393233] #PF: supervisor read access in kernel mode [ 278.399446] #PF: error_code(0x0000) - not-present page [ 278.405847] PGD 8000000850e73067 P4D 8000000850e73067 PUD 8620cd067 PMD 0 [ 278.414141] Oops: 0000 [#1] SMP PTI [ 278.419019] CPU: 7 PID: 3369 Comm: tc Not tainted 5.2.0-rc6+ #492 [ 278.426580] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 278.435853] RIP: 0010:flow_block_cb_setup_simple+0xc4/0x190 [ 278.442953] Code: 10 48 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 49 89 00 48 05 00 01 00 00 49 89 40 08 31 c0 c3 b8 a1 ff ff ff c3 f3 c3 <48> 8b 06 48 39 c6 75 0a eb 1a 48 8b 00 48 39 c6 74 12 48 3b 50 28 [ 278.464829] RSP: 0018:ffffaf07c3f97990 EFLAGS: 00010246 [ 278.471648] RAX: 0000000000000000 RBX: ffff9b43ed4c7680 RCX: ffff9b43d5f80840 [ 278.480408] RDX: ffffffffc0491650 RSI: 0000000000000000 RDI: ffffaf07c3f97998 [ 278.489110] RBP: ffff9b43ddff9000 R08: ffff9b43d5f80840 R09: 0000000000000001 [ 278.497838] R10: 0000000000000009 R11: 00000000000003ad R12: ffffaf07c3f97c08 [ 278.506595] R13: ffff9b43d5f80000 R14: ffff9b43ed4c7680 R15: ffff9b43dfa20b40 [ 278.515374] FS: 00007f796be1b400(0000) GS:ffff9b43ef840000(0000) knlGS:0000000000000000 [ 278.525099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 278.532453] CR2: 0000000000000000 CR3: 0000000840398002 CR4: 00000000001606e0 [ 278.541197] Call Trace: [ 278.545252] tcf_block_offload_cmd.isra.52+0x7e/0xb0 [ 278.551871] tcf_block_get_ext+0x365/0x3e0 [ 278.557569] qdisc_create+0x15c/0x4e0 [ 278.562859] ? kmem_cache_alloc_trace+0x1a2/0x1c0 [ 278.569235] tc_modify_qdisc+0x1c8/0x780 [ 278.574761] rtnetlink_rcv_msg+0x291/0x340 [ 278.580518] ? _cond_resched+0x15/0x40 [ 278.585856] ? rtnl_calcit.isra.29+0x120/0x120 [ 278.591868] netlink_rcv_skb+0x4a/0x110 [ 278.597198] netlink_unicast+0x1a0/0x250 [ 278.602601] netlink_sendmsg+0x2c1/0x3c0 [ 278.608022] sock_sendmsg+0x5b/0x60 [ 278.612969] ___sys_sendmsg+0x289/0x310 [ 278.618231] ? do_wp_page+0x99/0x730 [ 278.623216] ? page_add_new_anon_rmap+0xbe/0x140 [ 278.629298] ? __handle_mm_fault+0xc84/0x1360 [ 278.635113] ? __sys_sendmsg+0x5e/0xa0 [ 278.640285] __sys_sendmsg+0x5e/0xa0 [ 278.645239] do_syscall_64+0x5b/0x1b0 [ 278.650274] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 278.656697] RIP: 0033:0x7f796abdeb87 [ 278.661628] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 278.683248] RSP: 002b:00007ffde213ba48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 278.692245] RAX: ffffffffffffffda RBX: 000000005d261e6f RCX: 00007f796abdeb87 [ 278.700862] RDX: 0000000000000000 RSI: 00007ffde213bab0 RDI: 0000000000000003 [ 278.709527] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006 [ 278.718167] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 278.726743] R13: 000000000067b580 R14: 0000000000000000 R15: 0000000000000000 [ 278.735302] Modules linked in: dummy vxlan ip6_udp_tunnel udp_tunnel sch_ingress nfsv3 nfs_acl nfs lockd grace fscache bridge stp llc sunrpc mlx5_ib ib_uverbs intel_rapl ib_core sb_edac x86_pkg_temp_ thermal intel_powerclamp coretemp kvm_intel kvm mlx5_core irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel igb ghash_clmulni_intel ses mei_me enclosure mlxfw ipmi_ssif intel_cstate iTCO_wdt ptp mei pps_core iTCO_vendor_support pcspkr joydev intel_uncore i2c_i801 ipmi_si lpc_ich intel_rapl_perf ioatdma wmi dca pcc_cpufreq ipmi_devintf ipmi_msghandler acpi_power_meter acpi_pad ast i2c_algo_bit drm_k ms_helper ttm drm mpt3sas raid_class scsi_transport_sas [ 278.802263] CR2: 0000000000000000 [ 278.807170] ---[ end trace b1f0a442a279e66f ]--- Extend en_rep with new static mlx5e_rep_block_cb_list list and pass it to flow_block_cb_setup_simple() function instead of hardcoded NULL pointer. Fixes: 955bcb6ea0df ("drivers: net: use flow block API") Signed-off-by: Vlad Buslov Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 10ef90a7bddd..7245d287633d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1175,6 +1175,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, } } +static LIST_HEAD(mlx5e_rep_block_cb_list); + static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -1182,7 +1184,8 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: - return flow_block_cb_setup_simple(type_data, NULL, + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_cb_list, mlx5e_rep_setup_tc_cb, priv, priv, true); default: From fd262a6d8a5d8d504e6a0b0183a05deda6bef7a3 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Wed, 10 Jul 2019 19:30:29 +0100 Subject: [PATCH 29/36] nfp: flower: fix ethernet check on match fields NFP firmware does not explicitly match on an ethernet type field. Rather, each rule has a bitmask of match fields that can be used to infer the ethernet type. Currently, if a flower rule contains an unknown ethernet type, a check is carried out for matches on other fields of the packet. If matches on layer 3 or 4 are found, then the offload is rejected as firmware will not be able to extract these fields from a packet with an ethernet type it does not currently understand. However, if a rule contains an unknown ethernet type without any L3 (or above) matches then this will effectively be offloaded as a rule with a wildcarded ethertype. This can lead to misclassifications on the firmware. Fix this issue by rejecting all flower rules that specify a match on an unknown ethernet type. Further ensure correct offloads by moving the 'L3 and above' check to any rule that does not specify an ethernet type and rejecting rules with further matches. This means that we can still offload rules with a wildcarded ethertype if they only match on L2 fields but will prevent rules which match on further fields that we cannot be sure if the firmware will be able to extract. Fixes: af9d842c1354 ("nfp: extend flower add flow offload") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/offload.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 7e725fa60347..885f96887150 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -368,15 +368,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, break; default: - /* Other ethtype - we need check the masks for the - * remainder of the key to ensure we can offload. - */ - if (nfp_flower_check_higher_than_mac(flow)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: non IPv4/IPv6 offload with L3/L4 matches not supported"); - return -EOPNOTSUPP; - } - break; + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported"); + return -EOPNOTSUPP; } + } else if (nfp_flower_check_higher_than_mac(flow)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType"); + return -EOPNOTSUPP; } if (basic.mask && basic.mask->ip_proto) { From 103b7c25f5a2509ddccec830df924a78354232c4 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Wed, 10 Jul 2019 19:30:30 +0100 Subject: [PATCH 30/36] nfp: flower: ensure ip protocol is specified for L4 matches Flower rules on the NFP firmware are able to match on an IP protocol field. When parsing rules in the driver, unknown IP protocols are only rejected when further matches are to be carried out on layer 4 fields, as the firmware will not be able to extract such fields from packets. L4 protocol dissectors such as FLOW_DISSECTOR_KEY_PORTS are only parsed if an IP protocol is specified. This leaves a loophole whereby a rule that attempts to match on transport layer information such as port numbers but does not explicitly give an IP protocol type can be incorrectly offloaded (in this case with wildcard port numbers matches). Fix this by rejecting the offload of flows that attempt to match on L4 information, not only when matching on an unknown IP protocol type, but also when the protocol is wildcarded. Fixes: 2a04784594f6 ("nfp: flower: check L4 matches on unknown IP protocols") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/offload.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 885f96887150..faa8ba012a37 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -386,18 +386,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, key_layer |= NFP_FLOWER_LAYER_TP; key_size += sizeof(struct nfp_flower_tp_ports); break; - default: - /* Other ip proto - we need check the masks for the - * remainder of the key to ensure we can offload. - */ - if (nfp_flower_check_higher_than_l3(flow)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unknown IP protocol with L4 matches not supported"); - return -EOPNOTSUPP; - } - break; } } + if (!(key_layer & NFP_FLOWER_LAYER_TP) && + nfp_flower_check_higher_than_l3(flow)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type"); + return -EOPNOTSUPP; + } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { struct flow_match_tcp tcp; u32 tcp_flags; From 100c4043b808739d808bb5c9f6868d3808f062ea Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Thu, 11 Jul 2019 12:29:00 -0400 Subject: [PATCH 31/36] tc-tests: updated skbedit tests - Added mask upper bound test case - Added mask validation test case - Added mask replacement case Signed-off-by: Roman Mashak Signed-off-by: David S. Miller --- .../tc-testing/tc-tests/actions/skbedit.json | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index 45e7e89928a5..bf5ebf59c2d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -69,6 +69,123 @@ "matchCount": "0", "teardown": [] }, + { + "id": "d4cd", + "name": "Add skbedit action with valid mark and mask", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1/0xaabb", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1/0xaabb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "baa7", + "name": "Add skbedit action with valid mark and 32-bit maximum mask", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1/0xffffffff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1/0xffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, + { + "id": "62a5", + "name": "Add skbedit action with valid mark and mask exceeding 32-bit maximum", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1/0xaabbccddeeff112233", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1/0xaabbccddeeff112233", + "matchCount": "0", + "teardown": [] + }, + { + "id": "bc15", + "name": "Add skbedit action with valid mark and mask with invalid format", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action skbedit mark 1/-1234", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1/-1234", + "matchCount": "0", + "teardown": [] + }, + { + "id": "57c2", + "name": "Replace skbedit action with new mask", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit mark 1/0x11223344 index 1" + ], + "cmdUnderTest": "$TC actions replace action skbedit mark 1/0xaabb index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit mark 1/0xaabb", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] + }, { "id": "081d", "name": "Add skbedit action with priority", From d12cffe9329fd278555d0f9bb89af1259a2fd933 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 12 Jul 2019 10:41:15 +1200 Subject: [PATCH 32/36] tipc: ensure head->lock is initialised tipc_named_node_up() creates a skb list. It passes the list to tipc_node_xmit() which has some code paths that can call skb_queue_purge() which relies on the list->lock being initialised. The spin_lock is only needed if the messages end up on the receive path but when the list is created in tipc_named_node_up() we don't necessarily know if it is going to end up there. Once all the skb list users are updated in tipc it will then be possible to update them to use the unlocked variants of the skb list functions and initialise the lock when we know the message will follow the receive path. Signed-off-by: Chris Packham Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 61219f0b9677..44abc8e9c990 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; - __skb_queue_head_init(&head); + skb_queue_head_init(&head); read_lock_bh(&nt->cluster_scope_lock); named_distribute(net, &head, dnode, &nt->cluster_scope); From 752c2ea2d8e7c23b0f64e2e7d4337f3604d44c9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Jul 2019 11:06:33 +0200 Subject: [PATCH 33/36] cxgb4: reduce kernel stack usage in cudbg_collect_mem_region() The cudbg_collect_mem_region() and cudbg_read_fw_mem() both use several hundred kilobytes of kernel stack space. One gets inlined into the other, which causes the stack usage to be combined beyond the warning limit when building with clang: drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c:1057:12: error: stack frame size of 1244 bytes in function 'cudbg_collect_mem_region' [-Werror,-Wframe-larger-than=] Restructuring cudbg_collect_mem_region() lets clang do the same optimization that gcc does and reuse the stack slots as it can see that the large variables are never used together. A better fix might be to avoid using cudbg_meminfo on the stack altogether, but that requires a larger rewrite. Fixes: a1c69520f785 ("cxgb4: collect MC memory dump") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cudbg_lib.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index a76529a7662d..c2e92786608b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1054,14 +1054,12 @@ static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init, } } -static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, - struct cudbg_buffer *dbg_buff, - struct cudbg_error *cudbg_err, - u8 mem_type) +static unsigned long cudbg_mem_region_size(struct cudbg_init *pdbg_init, + struct cudbg_error *cudbg_err, + u8 mem_type) { struct adapter *padap = pdbg_init->adap; struct cudbg_meminfo mem_info; - unsigned long size; u8 mc_idx; int rc; @@ -1075,7 +1073,16 @@ static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, if (rc) return rc; - size = mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base; + return mem_info.avail[mc_idx].limit - mem_info.avail[mc_idx].base; +} + +static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, + u8 mem_type) +{ + unsigned long size = cudbg_mem_region_size(pdbg_init, cudbg_err, mem_type); + return cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, size, cudbg_err); } From 56170ba3bd90986e44f96ada9573dfe583621fca Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao Date: Fri, 12 Jul 2019 21:16:24 +0800 Subject: [PATCH 34/36] net: hisilicon: Use devm_platform_ioremap_resource Use devm_platform_ioremap_resource instead of devm_ioremap_resource. Make the code simpler. Signed-off-by: Jiangfeng Xiao Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 7 ++----- drivers/net/ethernet/hisilicon/hisi_femac.c | 7 ++----- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 7 ++----- drivers/net/ethernet/hisilicon/hns_mdio.c | 4 +--- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 625635771b83..d60452845539 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -899,7 +899,6 @@ static int hip04_mac_probe(struct platform_device *pdev) struct of_phandle_args arg; struct net_device *ndev; struct hip04_priv *priv; - struct resource *res; int irq; int ret; @@ -912,16 +911,14 @@ static int hip04_mac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); SET_NETDEV_DEV(ndev, &pdev->dev); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->base = devm_ioremap_resource(d, res); + priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { ret = PTR_ERR(priv->base); goto init_fail; } #if defined(CONFIG_HI13X1_GMAC) - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->sysctrl_base = devm_ioremap_resource(d, res); + priv->sysctrl_base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(priv->sysctrl_base)) { ret = PTR_ERR(priv->sysctrl_base); goto init_fail; diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index d2e019d89a6f..689f18e3100f 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -781,7 +781,6 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device_node *node = dev->of_node; - struct resource *res; struct net_device *ndev; struct hisi_femac_priv *priv; struct phy_device *phy; @@ -799,15 +798,13 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) priv->dev = dev; priv->ndev = ndev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->port_base = devm_ioremap_resource(dev, res); + priv->port_base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->port_base)) { ret = PTR_ERR(priv->port_base); goto out_free_netdev; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->glb_base = devm_ioremap_resource(dev, res); + priv->glb_base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(priv->glb_base)) { ret = PTR_ERR(priv->glb_base); goto out_free_netdev; diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 89ef764e1c4b..349970557c52 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1097,7 +1097,6 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) const struct of_device_id *of_id = NULL; struct net_device *ndev; struct hix5hd2_priv *priv; - struct resource *res; struct mii_bus *bus; const char *mac_addr; int ret; @@ -1119,15 +1118,13 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) } priv->hw_cap = (unsigned long)of_id->data; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->base = devm_ioremap_resource(dev, res); + priv->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(priv->base)) { ret = PTR_ERR(priv->base); goto out_free_netdev; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->ctrl_base = devm_ioremap_resource(dev, res); + priv->ctrl_base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(priv->ctrl_base)) { ret = PTR_ERR(priv->ctrl_base); goto out_free_netdev; diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 918cab1c61cd..3e863a71c513 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -417,7 +417,6 @@ static int hns_mdio_probe(struct platform_device *pdev) { struct hns_mdio_device *mdio_dev; struct mii_bus *new_bus; - struct resource *res; int ret = -ENODEV; if (!pdev) { @@ -442,8 +441,7 @@ static int hns_mdio_probe(struct platform_device *pdev) new_bus->priv = mdio_dev; new_bus->parent = &pdev->dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mdio_dev->vbase = devm_ioremap_resource(&pdev->dev, res); + mdio_dev->vbase = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(mdio_dev->vbase)) { ret = PTR_ERR(mdio_dev->vbase); return ret; From f32ae8a5f131a92cd8179adf55cd8405c60e0c20 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 12 Jul 2019 17:33:36 +0200 Subject: [PATCH 35/36] net: dsa: qca8k: replace legacy gpio include This patch replaces the legacy bulk gpio.h include with the proper gpio/consumer.h variant. This was caught by the kbuild test robot that was running into an error because of this. For more information why linux/gpio.h is bad can be found in: commit 56a46b6144e7 ("gpio: Clarify that is legacy") Reported-by: kbuild test robot Link: https://www.spinics.net/lists/netdev/msg584447.html Fixes: a653f2f538f9 ("net: dsa: qca8k: introduce reset via gpio feature") Signed-off-by: Christian Lamparter Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 27709f866c23..232e8cc96f6d 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include "qca8k.h" From 25a09ce79639a8775244808c17282c491cff89cf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 12 Jul 2019 16:39:31 -0700 Subject: [PATCH 36/36] ppp: mppe: Revert "ppp: mppe: Add softdep to arc4" Commit 0e5a610b5ca5 ("ppp: mppe: switch to RC4 library interface"), which was merged through the crypto tree for v5.3, changed ppp_mppe.c to use the new arc4_crypt() library function rather than access RC4 through the dynamic crypto_skcipher API. Meanwhile commit aad1dcc4f011 ("ppp: mppe: Add softdep to arc4") was merged through the net tree and added a module soft-dependency on "arc4". The latter commit no longer makes sense because the code now uses the "libarc4" module rather than "arc4", and also due to the direct use of arc4_crypt(), no module soft-dependency is required. So revert the latter commit. Cc: Takashi Iwai Cc: Ard Biesheuvel Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_mppe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ppp/ppp_mppe.c b/drivers/net/ppp/ppp_mppe.c index bd3c80b0bc77..de3b57d09d0c 100644 --- a/drivers/net/ppp/ppp_mppe.c +++ b/drivers/net/ppp/ppp_mppe.c @@ -64,7 +64,6 @@ MODULE_AUTHOR("Frank Cusack "); MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE)); -MODULE_SOFTDEP("pre: arc4"); MODULE_VERSION("1.0.2"); #define SHA1_PAD_SIZE 40