Merge branch 'mr_fix' into git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma for-next

Update mlx4 to support user MR creation against read-only memory, previously
it required the memory to be writable.

Based on rdma for-rc due to dependencies.

* mr_fix: (2 commits)
  IB/mlx4: Mark user MR as writable if actual virtual memory is writable
  IB/core: Make testing MR flags for writability a static inline function
This commit is contained in:
Jason Gunthorpe 2018-05-28 11:44:35 -06:00
commit 0394808d9e
31 changed files with 174 additions and 117 deletions

View File

@ -5391,7 +5391,6 @@ S: Maintained
F: drivers/iommu/exynos-iommu.c F: drivers/iommu/exynos-iommu.c
EZchip NPS platform support EZchip NPS platform support
M: Elad Kanfi <eladkan@mellanox.com>
M: Vineet Gupta <vgupta@synopsys.com> M: Vineet Gupta <vgupta@synopsys.com>
S: Supported S: Supported
F: arch/arc/plat-eznps F: arch/arc/plat-eznps
@ -9012,7 +9011,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
F: drivers/net/ethernet/mellanox/mlx5/core/en_* F: drivers/net/ethernet/mellanox/mlx5/core/en_*
MELLANOX ETHERNET INNOVA DRIVER MELLANOX ETHERNET INNOVA DRIVER
M: Ilan Tayari <ilant@mellanox.com>
R: Boris Pismenny <borisp@mellanox.com> R: Boris Pismenny <borisp@mellanox.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Supported S: Supported
@ -9022,7 +9020,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h F: include/linux/mlx5/mlx5_ifc_fpga.h
MELLANOX ETHERNET INNOVA IPSEC DRIVER MELLANOX ETHERNET INNOVA IPSEC DRIVER
M: Ilan Tayari <ilant@mellanox.com>
R: Boris Pismenny <borisp@mellanox.com> R: Boris Pismenny <borisp@mellanox.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Supported S: Supported
@ -9078,7 +9075,6 @@ F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX5 core VPI driver MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@mellanox.com> M: Saeed Mahameed <saeedm@mellanox.com>
M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
@ -9089,7 +9085,6 @@ F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/ F: include/linux/mlx5/
MELLANOX MLX5 IB driver MELLANOX MLX5 IB driver
M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com> M: Leon Romanovsky <leonro@mellanox.com>
L: linux-rdma@vger.kernel.org L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com W: http://www.mellanox.com
@ -9821,7 +9816,6 @@ F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c F: net/netfilter/xt_SECMARK.c
NETWORKING [TLS] NETWORKING [TLS]
M: Ilya Lesokhin <ilyal@mellanox.com>
M: Aviad Yehezkel <aviadye@mellanox.com> M: Aviad Yehezkel <aviadye@mellanox.com>
M: Dave Watson <davejwatson@fb.com> M: Dave Watson <davejwatson@fb.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org

View File

@ -117,20 +117,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size; umem->length = size;
umem->address = addr; umem->address = addr;
umem->page_shift = PAGE_SHIFT; umem->page_shift = PAGE_SHIFT;
umem->pid = get_task_pid(current, PIDTYPE_PID); umem->writable = ib_access_writable(access);
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access &
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
if (access & IB_ACCESS_ON_DEMAND) { if (access & IB_ACCESS_ON_DEMAND) {
put_pid(umem->pid);
ret = ib_umem_odp_get(context, umem, access); ret = ib_umem_odp_get(context, umem, access);
if (ret) { if (ret) {
kfree(umem); kfree(umem);
@ -146,7 +135,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
page_list = (struct page **) __get_free_page(GFP_KERNEL); page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) { if (!page_list) {
put_pid(umem->pid);
kfree(umem); kfree(umem);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
@ -229,7 +217,6 @@ out:
if (ret < 0) { if (ret < 0) {
if (need_release) if (need_release)
__ib_umem_release(context->device, umem, 0); __ib_umem_release(context->device, umem, 0);
put_pid(umem->pid);
kfree(umem); kfree(umem);
} else } else
current->mm->pinned_vm = locked; current->mm->pinned_vm = locked;
@ -272,8 +259,7 @@ void ib_umem_release(struct ib_umem *umem)
__ib_umem_release(umem->context->device, umem, 1); __ib_umem_release(umem->context->device, umem, 1);
task = get_pid_task(umem->pid, PIDTYPE_PID); task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task) if (!task)
goto out; goto out;
mm = get_task_mm(task); mm = get_task_mm(task);

View File

@ -489,10 +489,10 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
err_dereg_mem: err_dereg_mem:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp); mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
err_free_wr_wait:
c4iw_put_wr_wait(mhp->wr_waitp);
err_free_skb: err_free_skb:
kfree_skb(mhp->dereg_skb); kfree_skb(mhp->dereg_skb);
err_free_wr_wait:
c4iw_put_wr_wait(mhp->wr_waitp);
err_free_mhp: err_free_mhp:
kfree(mhp); kfree(mhp);
return ERR_PTR(ret); return ERR_PTR(ret);

View File

@ -5945,6 +5945,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
u64 status; u64 status;
u32 sw_index; u32 sw_index;
int i = 0; int i = 0;
unsigned long irq_flags;
sw_index = dd->hw_to_sw[hw_context]; sw_index = dd->hw_to_sw[hw_context];
if (sw_index >= dd->num_send_contexts) { if (sw_index >= dd->num_send_contexts) {
@ -5954,10 +5955,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
return; return;
} }
sci = &dd->send_contexts[sw_index]; sci = &dd->send_contexts[sw_index];
spin_lock_irqsave(&dd->sc_lock, irq_flags);
sc = sci->sc; sc = sci->sc;
if (!sc) { if (!sc) {
dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__, dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
sw_index, hw_context); sw_index, hw_context);
spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
return; return;
} }
@ -5979,6 +5982,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
*/ */
if (sc->type != SC_USER) if (sc->type != SC_USER)
queue_work(dd->pport->hfi1_wq, &sc->halt_work); queue_work(dd->pport->hfi1_wq, &sc->halt_work);
spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
/* /*
* Update the counters for the corresponding status bits. * Update the counters for the corresponding status bits.

View File

@ -377,6 +377,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
hr_cq->set_ci_db = hr_cq->db.db_record; hr_cq->set_ci_db = hr_cq->db.db_record;
*hr_cq->set_ci_db = 0; *hr_cq->set_ci_db = 0;
hr_cq->db_en = 1;
} }
/* Init mmt table and write buff address to mtt table */ /* Init mmt table and write buff address to mtt table */

View File

@ -722,6 +722,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
free_mr->mr_free_pd = to_hr_pd(pd); free_mr->mr_free_pd = to_hr_pd(pd);
free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev; free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
free_mr->mr_free_pd->ibpd.uobject = NULL; free_mr->mr_free_pd->ibpd.uobject = NULL;
free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0); atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE; attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
@ -1036,7 +1037,7 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
do { do {
ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
if (ret < 0) { if (ret < 0 && hr_qp) {
dev_err(dev, dev_err(dev,
"(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n", "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
hr_qp->qpn, ret, hr_mr->key, ne); hr_qp->qpn, ret, hr_mr->key, ne);

View File

@ -142,8 +142,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
unsigned long flags; unsigned long flags;
unsigned int ind; unsigned int ind;
void *wqe = NULL; void *wqe = NULL;
u32 tmp_len = 0;
bool loopback; bool loopback;
u32 tmp_len;
int ret = 0; int ret = 0;
u8 *smac; u8 *smac;
int nreq; int nreq;
@ -189,6 +189,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
owner_bit = owner_bit =
~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
tmp_len = 0;
/* Corresponding to the QP type, wqe process separately */ /* Corresponding to the QP type, wqe process separately */
if (ibqp->qp_type == IB_QPT_GSI) { if (ibqp->qp_type == IB_QPT_GSI) {
@ -547,17 +548,21 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
} }
if (i < hr_qp->rq.max_gs) { if (i < hr_qp->rq.max_gs) {
dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
dseg[i].addr = 0; dseg->addr = 0;
} }
/* rq support inline data */ /* rq support inline data */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge; hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
(u32)wr->num_sge;
for (i = 0; i < wr->num_sge; i++) { for (i = 0; i < wr->num_sge; i++) {
sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; sge_list[i].addr =
(void *)(u64)wr->sg_list[i].addr;
sge_list[i].len = wr->sg_list[i].length; sge_list[i].len = wr->sg_list[i].length;
} }
}
hr_qp->rq.wrid[ind] = wr->wr_id; hr_qp->rq.wrid[ind] = wr->wr_id;
@ -613,6 +618,8 @@ static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
dma_unmap_single(hr_dev->dev, ring->desc_dma_addr, dma_unmap_single(hr_dev->dev, ring->desc_dma_addr,
ring->desc_num * sizeof(struct hns_roce_cmq_desc), ring->desc_num * sizeof(struct hns_roce_cmq_desc),
DMA_BIDIRECTIONAL); DMA_BIDIRECTIONAL);
ring->desc_dma_addr = 0;
kfree(ring->desc); kfree(ring->desc);
} }
@ -1081,6 +1088,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
if (ret) { if (ret) {
dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n", dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
ret); ret);
return ret;
} }
/* Get pf resource owned by every pf */ /* Get pf resource owned by every pf */
@ -1373,6 +1381,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
mr->type == MR_TYPE_MR ? 0 : 1); mr->type == MR_TYPE_MR ? 0 : 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
1);
mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa); mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa);
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
@ -2171,6 +2181,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask) struct hns_roce_v2_qp_context *qpc_mask)
{ {
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
/* /*
@ -2283,7 +2294,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
context->rq_db_record_addr = hr_qp->rdb.dma >> 32; context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
qpc_mask->rq_db_record_addr = 0; qpc_mask->rq_db_record_addr = 0;
roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1); roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@ -4707,6 +4719,8 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
{0, } {0, }
}; };
MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle) struct hnae3_handle *handle)
{ {

View File

@ -198,7 +198,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props)); memset(props, 0, sizeof(*props));
props->sys_image_guid = cpu_to_be32(hr_dev->sys_image_guid); props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL)); props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap; props->page_size_cap = hr_dev->caps.page_size_cap;
props->vendor_id = hr_dev->vendor_id; props->vendor_id = hr_dev->vendor_id;

View File

@ -660,6 +660,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_rq_sge_list; goto err_rq_sge_list;
} }
*hr_qp->rdb.db_record = 0; *hr_qp->rdb.db_record = 0;
hr_qp->rdb_en = 1;
} }
/* Allocate QP buf */ /* Allocate QP buf */
@ -955,7 +956,14 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
} }
if (cur_state == new_state && cur_state == IB_QPS_RESET) { if (cur_state == new_state && cur_state == IB_QPS_RESET) {
if (hr_dev->caps.min_wqes) {
ret = -EPERM;
dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
new_state);
} else {
ret = 0; ret = 0;
}
goto out; goto out;
} }

View File

@ -207,6 +207,7 @@ struct i40iw_msix_vector {
u32 irq; u32 irq;
u32 cpu_affinity; u32 cpu_affinity;
u32 ceq_id; u32 ceq_id;
cpumask_t mask;
}; };
struct l2params_work { struct l2params_work {

View File

@ -2085,7 +2085,7 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
if (netif_is_bond_slave(netdev)) if (netif_is_bond_slave(netdev))
netdev = netdev_master_upper_dev_get(netdev); netdev = netdev_master_upper_dev_get(netdev);
neigh = dst_neigh_lookup(dst, &dst_addr); neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
rcu_read_lock(); rcu_read_lock();
if (neigh) { if (neigh) {

View File

@ -331,7 +331,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
switch (info->ae_id) { switch (info->ae_id) {
case I40IW_AE_LLP_FIN_RECEIVED: case I40IW_AE_LLP_FIN_RECEIVED:
if (qp->term_flags) if (qp->term_flags)
continue; break;
if (atomic_inc_return(&iwqp->close_timer_started) == 1) { if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT; iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) && if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
@ -360,7 +360,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
break; break;
case I40IW_AE_LLP_CONNECTION_RESET: case I40IW_AE_LLP_CONNECTION_RESET:
if (atomic_read(&iwqp->close_timer_started)) if (atomic_read(&iwqp->close_timer_started))
continue; break;
i40iw_cm_disconn(iwqp); i40iw_cm_disconn(iwqp);
break; break;
case I40IW_AE_QP_SUSPEND_COMPLETE: case I40IW_AE_QP_SUSPEND_COMPLETE:

View File

@ -687,7 +687,6 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
struct i40iw_msix_vector *msix_vec) struct i40iw_msix_vector *msix_vec)
{ {
enum i40iw_status_code status; enum i40iw_status_code status;
cpumask_t mask;
if (iwdev->msix_shared && !ceq_id) { if (iwdev->msix_shared && !ceq_id) {
tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@ -697,9 +696,9 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
} }
cpumask_clear(&mask); cpumask_clear(&msix_vec->mask);
cpumask_set_cpu(msix_vec->cpu_affinity, &mask); cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
irq_set_affinity_hint(msix_vec->irq, &mask); irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
if (status) { if (status) {
i40iw_pr_err("ceq irq config fail\n"); i40iw_pr_err("ceq irq config fail\n");

View File

@ -394,6 +394,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
list_for_each_entry(iwpbl, pbl_list, list) { list_for_each_entry(iwpbl, pbl_list, list) {
if (iwpbl->user_base == va) { if (iwpbl->user_base == va) {
iwpbl->on_list = false;
list_del(&iwpbl->list); list_del(&iwpbl->list);
return iwpbl; return iwpbl;
} }
@ -614,6 +615,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
iwqp = (struct i40iw_qp *)mem; iwqp = (struct i40iw_qp *)mem;
iwqp->allocated_buffer = mem;
qp = &iwqp->sc_qp; qp = &iwqp->sc_qp;
qp->back_qp = (void *)iwqp; qp->back_qp = (void *)iwqp;
qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
@ -642,7 +644,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error; goto error;
} }
iwqp->allocated_buffer = mem;
iwqp->iwdev = iwdev; iwqp->iwdev = iwdev;
iwqp->iwpd = iwpd; iwqp->iwpd = iwpd;
iwqp->ibqp.qp_num = qp_num; iwqp->ibqp.qp_num = qp_num;
@ -1898,6 +1899,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
goto error; goto error;
spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list); list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
iwpbl->on_list = true;
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break; break;
case IW_MEMREG_TYPE_CQ: case IW_MEMREG_TYPE_CQ:
@ -1908,6 +1910,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list); list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
iwpbl->on_list = true;
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break; break;
case IW_MEMREG_TYPE_MEM: case IW_MEMREG_TYPE_MEM:
@ -2045,14 +2048,18 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr,
switch (iwmr->type) { switch (iwmr->type) {
case IW_MEMREG_TYPE_CQ: case IW_MEMREG_TYPE_CQ:
spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags); spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
if (!list_empty(&ucontext->cq_reg_mem_list)) if (iwpbl->on_list) {
iwpbl->on_list = false;
list_del(&iwpbl->list); list_del(&iwpbl->list);
}
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags); spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break; break;
case IW_MEMREG_TYPE_QP: case IW_MEMREG_TYPE_QP:
spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
if (!list_empty(&ucontext->qp_reg_mem_list)) if (iwpbl->on_list) {
iwpbl->on_list = false;
list_del(&iwpbl->list); list_del(&iwpbl->list);
}
spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
break; break;
default: default:

View File

@ -78,6 +78,7 @@ struct i40iw_pbl {
}; };
bool pbl_allocated; bool pbl_allocated;
bool on_list;
u64 user_base; u64 user_base;
struct i40iw_pble_alloc pble_alloc; struct i40iw_pble_alloc pble_alloc;
struct i40iw_mr *iwmr; struct i40iw_mr *iwmr;

View File

@ -367,6 +367,40 @@ end:
return block_shift; return block_shift;
} }
static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
u64 length, u64 virt_addr,
int access_flags)
{
/*
* Force registering the memory as writable if the underlying pages
* are writable. This is so rereg can change the access permissions
* from readable to writable without having to run through ib_umem_get
* again
*/
if (!ib_access_writable(access_flags)) {
struct vm_area_struct *vma;
down_read(&current->mm->mmap_sem);
/*
* FIXME: Ideally this would iterate over all the vmas that
* cover the memory, but for now it requires a single vma to
* entirely cover the MR to support RO mappings.
*/
vma = find_vma(current->mm, start);
if (vma && vma->vm_end >= start + length &&
vma->vm_start <= start) {
if (vma->vm_flags & VM_WRITE)
access_flags |= IB_ACCESS_LOCAL_WRITE;
} else {
access_flags |= IB_ACCESS_LOCAL_WRITE;
}
up_read(&current->mm->mmap_sem);
}
return ib_umem_get(context, start, length, access_flags, 0);
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags, u64 virt_addr, int access_flags,
struct ib_udata *udata) struct ib_udata *udata)
@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr) if (!mr)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
/* Force registering the memory as writable. */ mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
/* Used for memory re-registeration. HCA protects the access */ virt_addr, access_flags);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
access_flags | IB_ACCESS_LOCAL_WRITE, 0);
if (IS_ERR(mr->umem)) { if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem); err = PTR_ERR(mr->umem);
goto err_free; goto err_free;
@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
} }
if (flags & IB_MR_REREG_ACCESS) { if (flags & IB_MR_REREG_ACCESS) {
if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
return -EPERM;
err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
convert_access(mr_access_flags)); convert_access(mr_access_flags));
@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem); ib_umem_release(mmr->umem);
mmr->umem = ib_umem_get(mr->uobject->context, start, length, mmr->umem =
mr_access_flags | mlx4_get_umem_mr(mr->uobject->context, start, length,
IB_ACCESS_LOCAL_WRITE, virt_addr, mr_access_flags);
0);
if (IS_ERR(mmr->umem)) { if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem); err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */

View File

@ -2419,7 +2419,7 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
} }
static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
bool inner) bool inner)
{ {
if (inner) { if (inner) {

View File

@ -487,11 +487,6 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1; return 1;
} }
static int first_med_bfreg(void)
{
return 1;
}
enum { enum {
/* this is the first blue flame register in the array of bfregs assigned /* this is the first blue flame register in the array of bfregs assigned
* to a processes. Since we do not use it for blue flame but rather * to a processes. Since we do not use it for blue flame but rather
@ -517,6 +512,12 @@ static int num_med_bfreg(struct mlx5_ib_dev *dev,
return n >= 0 ? n : 0; return n >= 0 ? n : 0;
} }
static int first_med_bfreg(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi)
{
return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
}
static int first_hi_bfreg(struct mlx5_ib_dev *dev, static int first_hi_bfreg(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi) struct mlx5_bfreg_info *bfregi)
{ {
@ -544,10 +545,13 @@ static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi) struct mlx5_bfreg_info *bfregi)
{ {
int minidx = first_med_bfreg(); int minidx = first_med_bfreg(dev, bfregi);
int i; int i;
for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) { if (minidx < 0)
return minidx;
for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
if (bfregi->count[i] < bfregi->count[minidx]) if (bfregi->count[i] < bfregi->count[minidx])
minidx = i; minidx = i;
if (!bfregi->count[minidx]) if (!bfregi->count[minidx])

View File

@ -401,49 +401,47 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{ {
struct qedr_ucontext *ucontext = get_qedr_ucontext(context); struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
struct qedr_dev *dev = get_qedr_dev(context->device); struct qedr_dev *dev = get_qedr_dev(context->device);
unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
u64 unmapped_db = dev->db_phys_addr;
unsigned long len = (vma->vm_end - vma->vm_start); unsigned long len = (vma->vm_end - vma->vm_start);
int rc = 0; unsigned long dpi_start;
bool found;
dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
DP_DEBUG(dev, QEDR_MSG_INIT, DP_DEBUG(dev, QEDR_MSG_INIT,
"qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); (void *)vma->vm_start, (void *)vma->vm_end,
if (vma->vm_start & (PAGE_SIZE - 1)) { (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
DP_ERR(dev, "Vma_start not page aligned = %ld\n",
vma->vm_start); if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
DP_ERR(dev,
"failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
(void *)vma->vm_start, (void *)vma->vm_end);
return -EINVAL; return -EINVAL;
} }
found = qedr_search_mmap(ucontext, vm_page, len); if (!qedr_search_mmap(ucontext, phys_addr, len)) {
if (!found) { DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
vma->vm_pgoff); vma->vm_pgoff);
return -EINVAL; return -EINVAL;
} }
DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); if (phys_addr < dpi_start ||
((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
DP_ERR(dev,
"failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
(void *)phys_addr, (void *)dpi_start,
ucontext->dpi_size);
return -EINVAL;
}
if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
dev->db_size))) {
DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
if (vma->vm_flags & VM_READ) { if (vma->vm_flags & VM_READ) {
DP_ERR(dev, "Trying to map doorbell bar for read\n"); DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
return -EPERM; return -EINVAL;
} }
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, vma->vm_page_prot);
PAGE_SIZE, vma->vm_page_prot);
} else {
DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
rc = remap_pfn_range(vma, vma->vm_start,
vma->vm_pgoff, len, vma->vm_page_prot);
}
DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
return rc;
} }
struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,

View File

@ -761,7 +761,6 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
unsigned int mask; unsigned int mask;
unsigned int length = 0; unsigned int length = 0;
int i; int i;
int must_sched;
while (wr) { while (wr) {
mask = wr_opcode_mask(wr->opcode, qp); mask = wr_opcode_mask(wr->opcode, qp);
@ -791,14 +790,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
wr = wr->next; wr = wr->next;
} }
/* rxe_run_task(&qp->req.task, 1);
* Must sched in case of GSI QP because ib_send_mad() hold irq lock,
* and the requester call ip_local_out_sk() that takes spin_lock_bh.
*/
must_sched = (qp_type(qp) == IB_QPT_GSI) ||
(queue_count(qp->sq.queue) > 1);
rxe_run_task(&qp->req.task, must_sched);
if (unlikely(qp->req.state == QP_STATE_ERROR)) if (unlikely(qp->req.state == QP_STATE_ERROR))
rxe_run_task(&qp->comp.task, 1); rxe_run_task(&qp->comp.task, 1);

View File

@ -1,6 +1,6 @@
config INFINIBAND_SRPT config INFINIBAND_SRPT
tristate "InfiniBand SCSI RDMA Protocol target support" tristate "InfiniBand SCSI RDMA Protocol target support"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && TARGET_CORE depends on INFINIBAND_ADDR_TRANS && TARGET_CORE
---help--- ---help---
Support for the SCSI RDMA Protocol (SRP) Target driver. The Support for the SCSI RDMA Protocol (SRP) Target driver. The

View File

@ -27,7 +27,7 @@ config NVME_FABRICS
config NVME_RDMA config NVME_RDMA
tristate "NVM Express over Fabrics RDMA host driver" tristate "NVM Express over Fabrics RDMA host driver"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS && BLOCK depends on INFINIBAND_ADDR_TRANS && BLOCK
select NVME_CORE select NVME_CORE
select NVME_FABRICS select NVME_FABRICS
select SG_POOL select SG_POOL

View File

@ -27,7 +27,7 @@ config NVME_TARGET_LOOP
config NVME_TARGET_RDMA config NVME_TARGET_RDMA
tristate "NVMe over Fabrics RDMA target support" tristate "NVMe over Fabrics RDMA target support"
depends on INFINIBAND && INFINIBAND_ADDR_TRANS depends on INFINIBAND_ADDR_TRANS
depends on NVME_TARGET depends on NVME_TARGET
select SGL_ALLOC select SGL_ALLOC
help help

View File

@ -34,7 +34,7 @@ config LNET_SELFTEST
config LNET_XPRT_IB config LNET_XPRT_IB
tristate "LNET infiniband support" tristate "LNET infiniband support"
depends on LNET && PCI && INFINIBAND && INFINIBAND_ADDR_TRANS depends on LNET && PCI && INFINIBAND_ADDR_TRANS
default LNET && INFINIBAND default LNET && INFINIBAND
help help
This option allows the LNET users to use infiniband as an This option allows the LNET users to use infiniband as an

View File

@ -197,7 +197,7 @@ config CIFS_SMB311
config CIFS_SMB_DIRECT config CIFS_SMB_DIRECT
bool "SMB Direct support (Experimental)" bool "SMB Direct support (Experimental)"
depends on CIFS=m && INFINIBAND && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND=y && INFINIBAND_ADDR_TRANS=y depends on CIFS=m && INFINIBAND_ADDR_TRANS || CIFS=y && INFINIBAND_ADDR_TRANS=y
help help
Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1. Enables SMB Direct experimental support for SMB 3.0, 3.02 and 3.1.1.
SMB Direct allows transferring SMB packets over RDMA. If unsure, SMB Direct allows transferring SMB packets over RDMA. If unsure,

View File

@ -48,7 +48,6 @@ struct ib_umem {
int writable; int writable;
int hugetlb; int hugetlb;
struct work_struct work; struct work_struct work;
struct pid *pid;
struct mm_struct *mm; struct mm_struct *mm;
unsigned long diff; unsigned long diff;
struct ib_umem_odp *odp_data; struct ib_umem_odp *odp_data;

View File

@ -3766,6 +3766,20 @@ static inline int ib_check_mr_access(int flags)
return 0; return 0;
} }
static inline bool ib_access_writable(int access_flags)
{
/*
* We have writable memory backing the MR if any of the following
* access flags are set. "Local write" and "remote write" obviously
* require write access. "Remote atomic" can do things like fetch and
* add, which will modify memory, and "MW bind" can change permissions
* by binding a window.
*/
return access_flags &
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
}
/** /**
* ib_check_mr_status: lightweight check of MR status. * ib_check_mr_status: lightweight check of MR status.
* This routine may provide status checks on a selected * This routine may provide status checks on a selected

View File

@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs
static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
u16 idx) u16 idx)
{ {
struct ib_uobject *uobj = const struct uverbs_attr *attr;
uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject;
if (IS_ERR(uobj)) attr = uverbs_attr_get(attrs_bundle, idx);
return uobj; if (IS_ERR(attr))
return ERR_CAST(attr);
return uobj->object; return attr->obj_attr.uobject->object;
} }
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,

View File

@ -32,7 +32,7 @@ config NET_9P_XEN
config NET_9P_RDMA config NET_9P_RDMA
depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS depends on INET && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)" tristate "9P RDMA Transport (Experimental)"
help help
This builds support for an RDMA transport. This builds support for an RDMA transport.

View File

@ -8,7 +8,7 @@ config RDS
config RDS_RDMA config RDS_RDMA
tristate "RDS over Infiniband" tristate "RDS over Infiniband"
depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS depends on RDS && INFINIBAND_ADDR_TRANS
---help--- ---help---
Allow RDS to use Infiniband as a transport. Allow RDS to use Infiniband as a transport.
This transport supports RDMA operations. This transport supports RDMA operations.

View File

@ -50,7 +50,7 @@ config SUNRPC_DEBUG
config SUNRPC_XPRT_RDMA config SUNRPC_XPRT_RDMA
tristate "RPC-over-RDMA transport" tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS depends on SUNRPC && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND default SUNRPC && INFINIBAND
select SG_POOL select SG_POOL
help help