Final set of -rc fixes for 4.6

- A number of collected fixes for oopses, memory corruptions, deadlocks,
   etc.  All of these fixes are small (many only 5-10 lines), obvious,
   and tested.
 - Fix for the security issue related to the use of write for
   bi-directional communications.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJXIrZVAAoJELgmozMOVy/d/XEP/1A4Ohm7WiZMN09wvlFGSgLe
 2z2tY9ILvFuiAF++VZRfYyRmHorVKHYB1tk0JTsW1Ts1DrkjExgr4LS1/YDLOC42
 q8YlBZw2x7pdnD5W1MJm+HK6oNj7aZVVjEHG7QnfLUIXr57a2rBQIeeWLx24M+OS
 j1yvaY/v39qvf7dwHwVjs07rh2WW9QCZn2c/552G4xz1YDdTkYBTc2WNnl0eng1f
 1NqqMXhnajmNyR+Q+0+Vbcp4YWv551l5E6j9M+5nebehNtPSRb0GEIjxT3KnSGEg
 AjFev3XwnRF9EkQOwgbsg7a784+UHXe15vbr1MvbzGygcQeq4NVzLl04WEHExQe1
 Om0ES/i8zfRs6d5XYB5zMY8pJbdjSVM/20d+h21SQs//4JXXJrN35WVAyy8lgwrX
 M3oY4t21eBQlV7oezfEZQgEEbdtccr8LILfZZmRUWPHd2ymaTWg6e4pZwtn45rlD
 O/Gb11G/UT7SXgw+XiPLBj5xlQk7nRn0kGuaStR7PonkLQ9Zy1wSSptvJeGj0VWE
 W6TEJnIqtv0aiJLhIQn8Ee1pCxE/ds7UPW6wT5O9R8ccEdDeIB3BDgskTNg1xPuS
 I8e1o7iA9752YS3wMDhLA8PifwbmVbkGHxJUQecOBPiDcdukkM0q4/YoNYqXKLCc
 nLDv3fMztpinG1L1T2LM
 =MFdC
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Final set of -rc fixes for 4.6.

  I've collected up a number of patches that are all pretty small with
  the exception of only a couple.  The hfi1 driver has a number of
  important patches, and it is what really drives the line count of this
  pull request up.  These are all small and I've got this kernel built
  and running in the test lab (I have most of the hardware, I think nes
  is the only thing in this patch set that I can't say I've personally
  tested and have up and running).

  Summary:

   - A number of collected fixes for oopses, memory corruptions,
     deadlocks, etc.  All of these fixes are small (many only 5-10
     lines), obvious, and tested.

   - Fix for the security issue related to the use of write for
     bi-directional communications"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  RDMA/nes: don't leak skb if carrier down
  IB/security: Restrict use of the write() interface
  IB/hfi1: Use kernel default llseek for ui device
  IB/hfi1: Don't attempt to free resources if initialization failed
  IB/hfi1: Fix missing lock/unlock in verbs drain callback
  IB/rdmavt: Fix send scheduling
  IB/hfi1: Prevent unpinning of wrong pages
  IB/hfi1: Fix deadlock caused by locking with wrong scope
  IB/hfi1: Prevent NULL pointer deferences in caching code
  MAINTAINERS: Update iser/isert maintainer contact info
  IB/mlx5: Expose correct max_sge_rd limit
  RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4 chips
  iw_cxgb4: handle draining an idle qp
  iw_cxgb3: initialize ibdev.iwcm->ifname for port mapping
  iw_cxgb4: initialize ibdev.iwcm->ifname for port mapping
  IB/core: Don't drain non-existent rq queue-pair
  IB/core: Fix oops in ib_cache_gid_set_default_gid
This commit is contained in:
Linus Torvalds 2016-04-29 17:07:54 -07:00
commit 925d96a0c9
23 changed files with 173 additions and 102 deletions

View File

@ -6027,7 +6027,7 @@ F: include/scsi/*iscsi*
ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
M: Or Gerlitz <ogerlitz@mellanox.com>
M: Sagi Grimberg <sagig@mellanox.com>
M: Sagi Grimberg <sagi@grimberg.me>
M: Roi Dayan <roid@mellanox.com>
L: linux-rdma@vger.kernel.org
S: Supported
@ -6037,7 +6037,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
F: drivers/infiniband/ulp/iser/
ISCSI EXTENSIONS FOR RDMA (ISER) TARGET
M: Sagi Grimberg <sagig@mellanox.com>
M: Sagi Grimberg <sagi@grimberg.me>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master
L: linux-rdma@vger.kernel.org
L: target-devel@vger.kernel.org

View File

@ -691,7 +691,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
NULL);
/* Coudn't find default GID location */
WARN_ON(ix < 0);
if (WARN_ON(ix < 0))
goto release;
zattr_type.gid_type = gid_type;

View File

@ -48,6 +48,7 @@
#include <asm/uaccess.h>
#include <rdma/ib.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_user_cm.h>
#include <rdma/ib_marshall.h>
@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr;
ssize_t result;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr))
return -EINVAL;

View File

@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr;
ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (len < sizeof(hdr))
return -EINVAL;

View File

@ -48,6 +48,8 @@
#include <asm/uaccess.h>
#include <rdma/ib.h>
#include "uverbs.h"
MODULE_AUTHOR("Roland Dreier");
@ -709,6 +711,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key;
ssize_t ret;
if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
return -EACCES;
if (count < sizeof hdr)
return -EINVAL;

View File

@ -1860,6 +1860,7 @@ EXPORT_SYMBOL(ib_drain_rq);
void ib_drain_qp(struct ib_qp *qp)
{
ib_drain_sq(qp);
ib_drain_rq(qp);
if (!qp->srq)
ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);

View File

@ -1390,6 +1390,8 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)

View File

@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_va) {
if (user && !cq->bar2_pa) {
pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;

View File

@ -580,6 +580,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)

View File

@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
if (pbar2_pa)
*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
if (is_t4(rdev->lldi.adapter_type))
return NULL;
return rdev->bar2_kva + bar2_qoffset;
}
@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
/*
* User mode must have bar2 access.
*/
if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) {
if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
goto free_dma;
@ -1895,13 +1899,27 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
void c4iw_drain_sq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
wait_for_completion(&qp->sq_drained);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_sq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
if (need_to_wait)
wait_for_completion(&qp->sq_drained);
}
void c4iw_drain_rq(struct ib_qp *ibqp)
{
struct c4iw_qp *qp = to_c4iw_qp(ibqp);
unsigned long flag;
bool need_to_wait;
wait_for_completion(&qp->rq_drained);
spin_lock_irqsave(&qp->lock, flag);
need_to_wait = !t4_rq_empty(&qp->wq);
spin_unlock_irqrestore(&qp->lock, flag);
if (need_to_wait)
wait_for_completion(&qp->rq_drained);
}

View File

@ -530,7 +530,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);

View File

@ -500,9 +500,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
* skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
*/
if (!netif_carrier_ok(netdev))
return NETDEV_TX_OK;
if (netif_queue_stopped(netdev))
return NETDEV_TX_BUSY;

View File

@ -45,6 +45,8 @@
#include <linux/export.h>
#include <linux/uio.h>
#include <rdma/ib.h>
#include "qib.h"
#include "qib_common.h"
#include "qib_user_sdma.h"
@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ssize_t ret = 0;
void *dest;
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd.type)) {
ret = -EINVAL;
goto bail;

View File

@ -1637,9 +1637,9 @@ bail:
spin_unlock_irqrestore(&qp->s_hlock, flags);
if (nreq) {
if (call_send)
rdi->driver_f.schedule_send_no_lock(qp);
else
rdi->driver_f.do_send(qp);
else
rdi->driver_f.schedule_send_no_lock(qp);
}
return err;
}

View File

@ -3,4 +3,4 @@ July, 2015
- Remove unneeded file entries in sysfs
- Remove software processing of IB protocol and place in library for use
by qib, ipath (if still present), hfi1, and eventually soft-roce
- Replace incorrect uAPI

View File

@ -49,6 +49,8 @@
#include <linux/vmalloc.h>
#include <linux/io.h>
#include <rdma/ib.h>
#include "hfi.h"
#include "pio.h"
#include "device.h"
@ -190,6 +192,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
int uctxt_required = 1;
int must_be_root = 0;
/* FIXME: This interface cannot continue out of staging */
if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
return -EACCES;
if (count < sizeof(cmd)) {
ret = -EINVAL;
goto bail;
@ -791,15 +797,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
dd->rcd[uctxt->ctxt] = NULL;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
uctxt->rcvwait_to = 0;
uctxt->piowait_to = 0;
uctxt->rcvnowait = 0;
uctxt->pionowait = 0;
uctxt->event_flags = 0;
hfi1_user_exp_rcv_free(fdata);
hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd);
@ -1127,27 +1134,13 @@ bail:
static int user_init(struct file *fp)
{
int ret;
unsigned int rcvctrl_ops = 0;
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
/* make sure that the context has already been setup */
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags)) {
ret = -EFAULT;
goto done;
}
/*
* Subctxts don't need to initialize anything since master
* has done it.
*/
if (fd->subctxt) {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
goto expected;
}
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
@ -1202,19 +1195,7 @@ static int user_init(struct file *fp)
wake_up(&uctxt->wait);
}
expected:
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
done:
return ret;
return 0;
}
static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
@ -1261,7 +1242,7 @@ static int setup_ctxt(struct file *fp)
int ret = 0;
/*
* Context should be set up only once (including allocation and
* Context should be set up only once, including allocation and
* programming of eager buffers. This is done if context sharing
* is not requested or by the master process.
*/
@ -1282,8 +1263,27 @@ static int setup_ctxt(struct file *fp)
if (ret)
goto done;
}
} else {
ret = wait_event_interruptible(uctxt->wait, !test_bit(
HFI1_CTXT_MASTER_UNINIT,
&uctxt->event_flags));
if (ret)
goto done;
}
ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
if (ret)
goto done;
/*
* Expected receive has to be setup for all processes (including
* shared contexts). However, it has to be done after the master
* context has been fully configured as it depends on the
* eager/expected split of the RcvArray entries.
* Setting it up here ensures that the subcontexts will be waiting
* (due to the above wait_event_interruptible() until the master
* is setup.
*/
ret = hfi1_user_exp_rcv_init(fp);
if (ret)
goto done;
@ -1565,29 +1565,8 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
{
struct hfi1_devdata *dd = filp->private_data;
switch (whence) {
case SEEK_SET:
break;
case SEEK_CUR:
offset += filp->f_pos;
break;
case SEEK_END:
offset = ((dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE) -
offset;
break;
default:
return -EINVAL;
}
if (offset < 0)
return -EINVAL;
if (offset >= (dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE)
return -EINVAL;
filp->f_pos = offset;
return filp->f_pos;
return fixed_size_llseek(filp, offset, whence,
(dd->kregend - dd->kregbase) + DC8051_DATA_MEM_SIZE);
}
/* NOTE: assumes unsigned long is 8 bytes */

View File

@ -71,6 +71,7 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *,
unsigned long, unsigned long);
static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
struct mm_struct *,
unsigned long, unsigned long);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
@ -137,7 +138,7 @@ void hfi1_mmu_rb_unregister(struct rb_root *root)
rbnode = rb_entry(node, struct mmu_rb_node, node);
rb_erase(node, root);
if (handler->ops->remove)
handler->ops->remove(root, rbnode, false);
handler->ops->remove(root, rbnode, NULL);
}
}
@ -176,7 +177,7 @@ unlock:
return ret;
}
/* Caller must host handler lock */
/* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr,
unsigned long len)
@ -200,15 +201,21 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
/* Caller must *not* hold handler lock. */
static void __mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *node, bool arg)
struct mmu_rb_node *node, struct mm_struct *mm)
{
unsigned long flags;
/* Validity of handler and node pointers has been checked by caller. */
hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
node->len);
spin_lock_irqsave(&handler->lock, flags);
__mmu_int_rb_remove(node, handler->root);
spin_unlock_irqrestore(&handler->lock, flags);
if (handler->ops->remove)
handler->ops->remove(handler->root, node, arg);
handler->ops->remove(handler->root, node, mm);
}
struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
@ -231,14 +238,11 @@ struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
{
struct mmu_rb_handler *handler = find_mmu_handler(root);
unsigned long flags;
if (!handler || !node)
return;
spin_lock_irqsave(&handler->lock, flags);
__mmu_rb_remove(handler, node, false);
spin_unlock_irqrestore(&handler->lock, flags);
__mmu_rb_remove(handler, node, NULL);
}
static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
@ -260,7 +264,7 @@ unlock:
static inline void mmu_notifier_page(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long addr)
{
mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
}
static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
@ -268,25 +272,31 @@ static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
mmu_notifier_mem_invalidate(mn, start, end);
mmu_notifier_mem_invalidate(mn, mm, start, end);
}
static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
struct rb_root *root = handler->root;
struct mmu_rb_node *node;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
node = __mmu_int_rb_iter_next(node, start, end - 1)) {
for (node = __mmu_int_rb_iter_first(root, start, end - 1);
node; node = ptr) {
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, start, end - 1);
hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
node->addr, node->len);
if (handler->ops->invalidate(root, node))
__mmu_rb_remove(handler, node, true);
if (handler->ops->invalidate(root, node)) {
spin_unlock_irqrestore(&handler->lock, flags);
__mmu_rb_remove(handler, node, mm);
spin_lock_irqsave(&handler->lock, flags);
}
}
spin_unlock_irqrestore(&handler->lock, flags);
}

View File

@ -59,7 +59,8 @@ struct mmu_rb_node {
struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
int (*insert)(struct rb_root *, struct mmu_rb_node *);
void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
void (*remove)(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
};

View File

@ -519,10 +519,12 @@ static void iowait_sdma_drained(struct iowait *wait)
* do the flush work until that QP's
* sdma work has finished.
*/
spin_lock(&qp->s_lock);
if (qp->s_flags & RVT_S_WAIT_DMA) {
qp->s_flags &= ~RVT_S_WAIT_DMA;
hfi1_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
}
/**

View File

@ -87,7 +87,8 @@ static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
static int set_rcvarray_entry(struct file *, unsigned long, u32,
struct tid_group *, struct page **, unsigned);
static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
struct tid_pageset *, unsigned, u16, struct page **,
@ -254,6 +255,8 @@ int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_group *grp, *gptr;
if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
return 0;
/*
* The notifier would have been removed when the process'es mm
* was freed.
@ -899,7 +902,7 @@ static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
mmu_rb_remove(&fd->tid_rb_root, &node->mmu, NULL);
else
hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
@ -965,7 +968,7 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
continue;
if (HFI1_CAP_IS_USET(TID_UNMAP))
mmu_rb_remove(&fd->tid_rb_root,
&node->mmu, false);
&node->mmu, NULL);
else
hfi1_mmu_rb_remove(&fd->tid_rb_root,
&node->mmu);
@ -1032,7 +1035,7 @@ static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
}
static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
bool notifier)
struct mm_struct *mm)
{
struct hfi1_filedata *fdata =
container_of(root, struct hfi1_filedata, tid_rb_root);

View File

@ -278,7 +278,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
static void user_sdma_free_request(struct user_sdma_request *, bool);
static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
unsigned);
static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *,
@ -299,7 +300,8 @@ static int defer_packet_queue(
static void activate_packet_queue(struct iowait *, int);
static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *,
struct mm_struct *);
static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
static struct mmu_rb_ops sdma_rb_ops = {
@ -1063,8 +1065,10 @@ static int pin_vector_pages(struct user_sdma_request *req,
rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
(unsigned long)iovec->iov.iov_base,
iovec->iov.iov_len);
if (rb_node)
if (rb_node && !IS_ERR(rb_node))
node = container_of(rb_node, struct sdma_mmu_node, rb);
else
rb_node = NULL;
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@ -1107,7 +1111,8 @@ retry:
goto bail;
}
if (pinned != npages) {
unpin_vector_pages(current->mm, pages, pinned);
unpin_vector_pages(current->mm, pages, node->npages,
pinned);
ret = -EFAULT;
goto bail;
}
@ -1147,9 +1152,9 @@ bail:
}
static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
unsigned npages)
unsigned start, unsigned npages)
{
hfi1_release_user_pages(mm, pages, npages, 0);
hfi1_release_user_pages(mm, pages + start, npages, 0);
kfree(pages);
}
@ -1502,7 +1507,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
&req->pq->sdma_rb_root,
(unsigned long)req->iovs[i].iov.iov_base,
req->iovs[i].iov.iov_len);
if (!mnode)
if (!mnode || IS_ERR(mnode))
continue;
node = container_of(mnode, struct sdma_mmu_node, rb);
@ -1547,7 +1552,7 @@ static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
}
static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
bool notifier)
struct mm_struct *mm)
{
struct sdma_mmu_node *node =
container_of(mnode, struct sdma_mmu_node, rb);
@ -1557,14 +1562,20 @@ static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
node->pq->n_locked -= node->npages;
spin_unlock(&node->pq->evict_lock);
unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
/*
* If mm is set, we are being called by the MMU notifier and we
* should not pass a mm_struct to unpin_vector_page(). This is to
* prevent a deadlock when hfi1_release_user_pages() attempts to
* take the mmap_sem, which the MMU notifier has already taken.
*/
unpin_vector_pages(mm ? NULL : current->mm, node->pages, 0,
node->npages);
/*
* If called by the MMU notifier, we have to adjust the pinned
* page count ourselves.
*/
if (notifier)
current->mm->pinned_vm -= node->npages;
if (mm)
mm->pinned_vm -= node->npages;
kfree(node);
}

View File

@ -392,6 +392,17 @@ enum {
MLX5_CAP_OFF_CMDIF_CSUM = 46,
};
enum {
/*
* Max wqe size for rdma read is 512 bytes, so this
* limits our max_sge_rd as the wqe needs to fit:
* - ctrl segment (16 bytes)
* - rdma segment (16 bytes)
* - scatter elements (16 bytes each)
*/
MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16
};
struct mlx5_inbox_hdr {
__be16 opcode;
u8 rsvd[4];

View File

@ -34,6 +34,7 @@
#define _RDMA_IB_H
#include <linux/types.h>
#include <linux/sched.h>
struct ib_addr {
union {
@ -86,4 +87,19 @@ struct sockaddr_ib {
__u64 sib_scope_id;
};
/*
* The IB interfaces that use write() as bi-directional ioctl() are
* fundamentally unsafe, since there are lots of ways to trigger "write()"
* calls from various contexts with elevated privileges. That includes the
* traditional suid executable error message writes, but also various kernel
* interfaces that can write to file descriptors.
*
* This function provides protection for the legacy API by restricting the
* calling context.
*/
static inline bool ib_safe_file_access(struct file *filp)
{
return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
}
#endif /* _RDMA_IB_H */