From dc1435c00fcd102c9803cd6120701ba5547138d5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Fri, 17 May 2019 15:43:10 +0300 Subject: [PATCH 01/11] RDMA/srp: Rename SRP sysfs name after IB device rename trigger SRP logic used device name and port index as symlink to relevant kobject. If the IB device is renamed then the prior name will be re-used by the next device plugged in and sysfs will panic as SRP will try to re-use the same name. mlx5_ib: Mellanox Connect-IB Infiniband driver v5.0-0 sysfs: cannot create duplicate filename '/class/infiniband_srp/srp-mlx5_0-1' CPU: 3 PID: 1107 Comm: modprobe Not tainted 5.1.0-for-upstream-perf-2019-05-12_15-09-52-87 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0x5a/0x73 sysfs_warn_dup+0x58/0x70 sysfs_do_create_link_sd.isra.2+0xa3/0xb0 device_add+0x33f/0x660 srp_add_one+0x301/0x4f0 [ib_srp] add_client_context+0x99/0xe0 [ib_core] enable_device_and_get+0xd1/0x1b0 [ib_core] ib_register_device+0x533/0x710 [ib_core] ? mutex_lock+0xe/0x30 __mlx5_ib_add+0x23/0x70 [mlx5_ib] mlx5_add_device+0x4e/0xd0 [mlx5_core] mlx5_register_interface+0x85/0xc0 [mlx5_core] ? 0xffffffffa0791000 do_one_initcall+0x4b/0x1cb ? kmem_cache_alloc_trace+0xc6/0x1d0 ? do_init_module+0x22/0x21f do_init_module+0x5a/0x21f load_module+0x17f2/0x1ca0 ? m_show+0x1c0/0x1c0 __do_sys_finit_module+0x94/0xe0 do_syscall_64+0x48/0x120 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f157cce10d9 The module load/unload sequence was used to trigger such kernel panic: sudo modprobe ib_srp sudo modprobe -r mlx5_ib sudo modprobe -r mlx5_core sudo modprobe mlx5_core Have SRP track the name of the core device so that it can't have a name collision. Fixes: d21943dd19b5 ("RDMA/core: Implement IB device rename function") Signed-off-by: Leon Romanovsky Reviewed-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 35 +++++++++++++++++++++-------- drivers/infiniband/ulp/srp/ib_srp.c | 18 ++++++++++++++- include/rdma/ib_verbs.h | 1 + 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 78dc07c6ac4b..cd6b679badfe 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -409,27 +409,44 @@ static int rename_compat_devs(struct ib_device *device) int ib_device_rename(struct ib_device *ibdev, const char *name) { + unsigned long index; + void *client_data; int ret; down_write(&devices_rwsem); if (!strcmp(name, dev_name(&ibdev->dev))) { - ret = 0; - goto out; + up_write(&devices_rwsem); + return 0; } if (__ib_device_get_by_name(name)) { - ret = -EEXIST; - goto out; + up_write(&devices_rwsem); + return -EEXIST; } ret = device_rename(&ibdev->dev, name); - if (ret) - goto out; + if (ret) { + up_write(&devices_rwsem); + return ret; + } + strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); -out: - up_write(&devices_rwsem); - return ret; + + downgrade_write(&devices_rwsem); + down_read(&ibdev->client_data_rwsem); + xan_for_each_marked(&ibdev->client_data, index, client_data, + CLIENT_DATA_REGISTERED) { + struct ib_client *client = xa_load(&clients, index); + + if (!client || !client->rename) + continue; + + client->rename(ibdev, client_data); + } + up_read(&ibdev->client_data_rwsem); + up_read(&devices_rwsem); + return 0; } static int alloc_name(struct ib_device *ibdev, const char *name) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index be9ddcad8f28..4305da2c9037 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -148,6 +148,7 @@ MODULE_PARM_DESC(ch_count, static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device, void *client_data); +static void srp_rename_dev(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, const char *opname); @@ -162,7 +163,8 @@ static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", .add = srp_add_one, - .remove = srp_remove_one + .remove = srp_remove_one, + .rename = srp_rename_dev }; static struct ib_sa_client srp_sa_client; @@ -4112,6 +4114,20 @@ free_host: return NULL; } +static void srp_rename_dev(struct ib_device *device, void *client_data) +{ + struct srp_device *srp_dev = client_data; + struct srp_host *host, *tmp_host; + + list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { + char name[IB_DEVICE_NAME_MAX + 8]; + + snprintf(name, sizeof(name), "srp-%s-%d", + dev_name(&device->dev), host->port); + device_rename(&host->dev, name); + } +} + static void srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0742095355f2..54873085f2da 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2698,6 +2698,7 @@ struct ib_client { const char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *, void *client_data); + void (*rename)(struct ib_device *dev, void *client_data); /* Returns the net_dev belonging to this ib_client and matching the * given parameters. From 619122be3d40c835eb5fad9e326780909926495d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 20 May 2019 09:43:53 +0300 Subject: [PATCH 02/11] RDMA/hns: Fix PD memory leak for internal allocation free_pd is allocated internally by the driver hence needs to be freed internally too or it leaks. Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Signed-off-by: Leon Romanovsky Acked-by: Lijun Ou Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4c5d0f160c10..e068a02122f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -899,6 +899,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); + kfree(&free_mr->mr_free_pd->ibpd); } static int hns_roce_db_init(struct hns_roce_dev *hr_dev) From 6875cb175ca7e0c24aa4460ac2b3257fdf662832 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 21 May 2019 17:55:22 +0000 Subject: [PATCH 03/11] RDMA/core: Clear out the udata before error unwind The core code should not pass a udata to the driver destroy function that contains the input from the create command. Otherwise the driver will attempt to interpret the create udata as destroy udata, and at least in the case of EFA, will leak resources. Zero this stuff out before invoking destroy. Reported-by: Leon Romanovsky Fixes: c4367a26357b ("IB: Pass uverbs_attr_bundle down ib_x destroy path") Reviewed-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.h | 2 ++ drivers/infiniband/core/uverbs_cmd.c | 21 ++++++++++++++----- drivers/infiniband/core/uverbs_std_types_mr.c | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 5445323629b5..e63fbda25e1d 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -110,6 +110,8 @@ int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile); void release_ufile_idr_uobject(struct ib_uverbs_file *ufile); +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs); + /* * This is the runtime description of the uverbs API, used by the syscall * machinery to validate and dispatch calls. diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 5a3a1780ceea..a9b32ebb9beb 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -174,6 +174,17 @@ static int uverbs_request_finish(struct uverbs_req_iter *iter) return 0; } +/* + * When calling a destroy function during an error unwind we need to pass in + * the udata that is sanitized of all user arguments. Ie from the driver + * perspective it looks like no udata was passed. + */ +struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs) +{ + attrs->driver_udata = (struct ib_udata){}; + return &attrs->driver_udata; +} + static struct ib_uverbs_completion_event_file * _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs) { @@ -441,7 +452,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dealloc_pd_user(pd, &attrs->driver_udata); + ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); pd = NULL; err_alloc: kfree(pd); @@ -644,7 +655,7 @@ err_copy: } err_dealloc_xrcd: - ib_dealloc_xrcd(xrcd, &attrs->driver_udata); + ib_dealloc_xrcd(xrcd, uverbs_get_cleared_udata(attrs)); err: uobj_alloc_abort(&obj->uobject, attrs); @@ -767,7 +778,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - ib_dereg_mr_user(mr, &attrs->driver_udata); + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); err_put: uobj_put_obj_read(pd); @@ -2964,7 +2975,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_wq(wq, &attrs->driver_udata); + ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: uobj_put_obj_read(cq); err_put_pd: @@ -3464,7 +3475,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_copy: - ib_destroy_srq_user(srq, &attrs->driver_udata); + ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); err_free: kfree(srq); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 610d3b9f7654..997f7a3a558a 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -148,7 +148,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( return 0; err_dereg: - ib_dereg_mr_user(mr, &attrs->driver_udata); + ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); return ret; } From 6876aaedc8a11ed182aba1942dac44e9940bfe6c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 22 May 2019 11:06:43 +0300 Subject: [PATCH 04/11] RDMA/uverbs: Pass udata on uverbs error unwind When destroy_* is called as a result of uverbs create cleanup flow a cleared udata should be passed instead of NULL to indicate that it is called under user flow. Fixes: c4367a26357b ("IB: Pass uverbs_attr_bundle down ib_x destroy path") Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 9 +++++---- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a9b32ebb9beb..63fe14c7c68f 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1053,7 +1053,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, return obj; err_cb: - ib_destroy_cq(cq); + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); err_file: if (ev_file) @@ -1489,7 +1489,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_cb: - ib_destroy_qp(qp); + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_put: if (!IS_ERR(xrcd_uobj)) @@ -1622,7 +1622,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(&obj->uevent.uobject, attrs); err_destroy: - ib_destroy_qp(qp); + ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); err_xrcd: uobj_put_read(xrcd_uobj); err_put: @@ -2464,7 +2464,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) return uobj_alloc_commit(uobj, attrs); err_copy: - rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE); + rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, + uverbs_get_cleared_udata(attrs)); err_put: uobj_put_obj_read(pd); diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index db5c46a1bb2d..07ea4e3c4566 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -135,7 +135,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return 0; err_cq: - ib_destroy_cq(cq); + ib_destroy_cq_user(cq, uverbs_get_cleared_udata(attrs)); err_event_file: if (ev_file) From 46bdf370f671c90573e8b683d9a14bb335057a36 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 23 May 2019 10:12:51 +0300 Subject: [PATCH 05/11] RDMA/core: Fix panic when port_data isn't initialized This happens if assign_name() returns failure when called from ib_register_device(), that will lead to the following panic in every time that someone touches the port_data's data members. BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 19 PID: 1994 Comm: systemd-udevd Not tainted 5.1.0-rc5+ #1 Hardware name: HP ProLiant DL360p Gen8, BIOS P71 12/20/2013 RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40 Code: 85 ff 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 9c 58 66 66 90 66 90 48 89 c3 fa 66 66 90 66 66 90 31 c0 ba 01 00 00 00 0f b1 17 0f 94 c2 84 d2 74 05 48 89 d8 5b c3 89 c6 e8 b4 85 8a RSP: 0018:ffffa8d7079a7c08 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000202 RCX: ffffa8d7079a7bf8 RDX: 0000000000000001 RSI: ffff93607c990000 RDI: 00000000000000c0 RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffc08c4dd8 R10: 0000000000000000 R11: 0000000000000001 R12: 00000000000000c0 R13: ffff93607c990000 R14: ffffffffc05a9740 R15: ffffa8d7079a7e98 FS: 00007f1c6ee438c0(0000) GS:ffff93609f6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000c0 CR3: 0000000819fca002 CR4: 00000000000606e0 Call Trace: free_netdevs+0x4d/0xe0 [ib_core] ib_dealloc_device+0x51/0xb0 [ib_core] __mlx5_ib_add+0x5e/0x70 [mlx5_ib] mlx5_add_device+0x57/0xe0 [mlx5_core] mlx5_register_interface+0x85/0xc0 [mlx5_core] ? 0xffffffffc0474000 do_one_initcall+0x4e/0x1d4 ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x15f/0x1c0 do_init_module+0x5a/0x218 load_module+0x186b/0x1e40 ? m_show+0x1c0/0x1c0 __do_sys_finit_module+0x94/0xe0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 8ceb1357b337 ("RDMA/device: Consolidate ib_device per_port data into one place") Signed-off-by: Kamal Heib Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index cd6b679badfe..29f7b15c81d9 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -491,14 +491,15 @@ static void ib_device_release(struct device *device) free_netdevs(dev); WARN_ON(refcount_read(&dev->refcount)); - ib_cache_release_one(dev); - ib_security_release_port_pkey_list(dev); - xa_destroy(&dev->compat_devs); - xa_destroy(&dev->client_data); - if (dev->port_data) + if (dev->port_data) { + ib_cache_release_one(dev); + ib_security_release_port_pkey_list(dev); kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu, pdata[0]), rcu_head); + } + xa_destroy(&dev->compat_devs); + xa_destroy(&dev->client_data); kfree_rcu(dev, rcu_head); } @@ -1952,6 +1953,9 @@ static void free_netdevs(struct ib_device *ib_dev) unsigned long flags; unsigned int port; + if (!ib_dev->port_data) + return; + rdma_for_each_port (ib_dev, port) { struct ib_port_data *pdata = &ib_dev->port_data[port]; struct net_device *ndev; From 2abae62a26a265129b364d8c1ef3be55e2c01309 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 24 May 2019 11:44:38 -0400 Subject: [PATCH 06/11] IB/rdmavt: Fix alloc_qpn() WARN_ON() The qpn allocation logic has a WARN_ON() that intends to detect the use of an index that will introduce bits in the lower order bits of the QOS bits in the QPN. Unfortunately, it has the following bugs: - it misfires when wrapping QPN allocation for non-QOS - it doesn't correctly detect low order QOS bits (despite the comment) The WARN_ON() should not be applied to non-QOS (qos_shift == 1). Additionally, it SHOULD test the qpn bits per the table below: 2 data VLs: [qp7, qp6, qp5, qp4, qp3, qp2, qp1] ^ [ 0, 0, 0, 0, 0, 0, sc0], qp bit 1 always 0* 3-4 data VLs: [qp7, qp6, qp5, qp4, qp3, qp2, qp1] ^ [ 0, 0, 0, 0, 0, sc1, sc0], qp bits [21] always 0 5-8 data VLs: [qp7, qp6, qp5, qp4, qp3, qp2, qp1] ^ [ 0, 0, 0, 0, sc2, sc1, sc0] qp bits [321] always 0 Fix by qualifying the warning for qos_shift > 1 and producing the correct mask to insure the above bits are zero without generating a superfluous warning. Fixes: 501edc42446e ("IB/rdmavt: Correct warning during QPN allocation") Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 31a2e65e4906..c5a50614a6c6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -594,7 +594,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, offset = qpt->incr | ((offset & 1) ^ 1); } /* there can be no set bits in low-order QoS bits */ - WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1)); + WARN_ON(rdi->dparms.qos_shift > 1 && + offset & ((BIT(rdi->dparms.qos_shift - 1) - 1) << 1)); qpn = mk_qpn(qpt, map, offset); } From 6d517353c70bb0818b691ca003afdcb5ee5ea44e Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 24 May 2019 11:44:45 -0400 Subject: [PATCH 07/11] IB/hfi1: Insure freeze_work work_struct is canceled on shutdown By code inspection, the freeze_work is never canceled. Fix by adding a cancel_work_sync in the shutdown path to insure it is no longer running. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Reviewed-by: Michael J. Ruhl Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 310105d4e3de..4221a99ee7f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9850,6 +9850,7 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) /* disable the port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + cancel_work_sync(&ppd->freeze_work); } static inline int init_cpu_counters(struct hfi1_devdata *dd) From 35164f5259a47ea756fa1deb3e463ac2a4f10dc9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 24 May 2019 11:44:51 -0400 Subject: [PATCH 08/11] IB/{qib, hfi1, rdmavt}: Correct ibv_devinfo max_mr value The command 'ibv_devinfo -v' reports 0 for max_mr. Fix by assigning the query values after the mr lkey_table has been built rather than early on in the driver. Fixes: 7b1e2099adc8 ("IB/rdmavt: Move memory registration into rdmavt") Reviewed-by: Josh Collier Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/verbs.c | 2 -- drivers/infiniband/hw/qib/qib_verbs.c | 2 -- drivers/infiniband/sw/rdmavt/mr.c | 2 ++ 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 1eb4105b2d22..a2b26a635baf 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1356,8 +1356,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd) rdi->dparms.props.max_cq = hfi1_max_cqs; rdi->dparms.props.max_ah = hfi1_max_ahs; rdi->dparms.props.max_cqe = hfi1_max_cqes; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_pd = hfi1_max_pds; rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 5ff32d32c61c..2c4e569ce438 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1459,8 +1459,6 @@ static void qib_fill_device_attr(struct qib_devdata *dd) rdi->dparms.props.max_cq = ib_qib_max_cqs; rdi->dparms.props.max_cqe = ib_qib_max_cqes; rdi->dparms.props.max_ah = ib_qib_max_ahs; - rdi->dparms.props.max_mr = rdi->lkey_table.max; - rdi->dparms.props.max_fmr = rdi->lkey_table.max; rdi->dparms.props.max_map_per_fmr = 32767; rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; rdi->dparms.props.max_qp_init_rd_atom = 255; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 54f3f9c27552..f48240f66b8f 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -96,6 +96,8 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) for (i = 0; i < rdi->lkey_table.max; i++) RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); + rdi->dparms.props.max_mr = rdi->lkey_table.max; + rdi->dparms.props.max_fmr = rdi->lkey_table.max; return 0; } From 97736f36dbebf2cda2799db3b54717ba5b388255 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Fri, 24 May 2019 11:45:04 -0400 Subject: [PATCH 09/11] IB/hfi1: Validate page aligned for a given virtual address User applications can register memory regions for TID buffers that are not aligned on page boundaries. Hfi1 is expected to pin those pages in memory and cache the pages with mmu_rb. The rb tree will fail to insert pages that are not aligned correctly. Validate whether a given virtual address is page aligned before pinning. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0cd71ce7cc71..3592a9ec155e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -324,6 +324,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + if (!PAGE_ALIGNED(tinfo->vaddr)) + return -EINVAL; + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) return -ENOMEM; From 37eb86c4507abcb14fc346863e83aa8751aa4675 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 20 May 2019 13:19:02 +0200 Subject: [PATCH 10/11] mlx5: avoid 64-bit division Commit 25c13324d03d ("IB/mlx5: Add steering SW ICM device memory type") breaks i386 build by introducing three 64-bit divisions. As the divisor is MLX5_SW_ICM_BLOCK_SIZE() which is always a power of 2, we can replace the division with bit operations. Fixes: 25c13324d03d ("IB/mlx5: Add steering SW ICM device memory type") Signed-off-by: Michal Kubecek Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/cmd.c | 9 +++++++-- drivers/infiniband/hw/mlx5/main.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index e3ec79b8f7f5..6c8645033102 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -190,12 +190,12 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id) { struct mlx5_core_dev *dev = dm->dev; - u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; unsigned long *block_map; u64 icm_start_addr; u32 log_icm_size; + u32 num_blocks; u32 max_blocks; u64 block_idx; void *sw_icm; @@ -224,6 +224,8 @@ int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, return -EINVAL; } + num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); spin_lock(&dm->lock); block_idx = bitmap_find_next_zero_area(block_map, @@ -266,13 +268,16 @@ int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, u16 uid, phys_addr_t addr, u32 obj_id) { struct mlx5_core_dev *dev = dm->dev; - u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev)); u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; unsigned long *block_map; + u32 num_blocks; u64 start_idx; int err; + num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> + MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); + switch (type) { case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: start_idx = diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index abac70ad5c7c..340290b883fe 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2344,7 +2344,7 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, /* Allocation size must a multiple of the basic block size * and a power of 2. */ - act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); act_size = roundup_pow_of_two(act_size); dm->size = act_size; From 4f240dfec6bcc852b124ea7c419fb590949fbd4c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 28 May 2019 15:46:13 +0300 Subject: [PATCH 11/11] RDMA/efa: Remove MAYEXEC flag check from mmap flow MAYEXEC test was mistakenly added, remove it. Checking MAYEXEC in the driver prevents it from working with userspace that uses things like EXEC STACK. (ie some Fortran and other runtimes) Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Reported-by: Jason Gunthorpe Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 6d6886c9009f..0fea5d63fdbe 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1728,7 +1728,6 @@ int efa_mmap(struct ib_ucontext *ibucontext, ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); return -EPERM; } - vma->vm_flags &= ~VM_MAYEXEC; return __efa_mmap(dev, ucontext, vma, key, length); }