From 470a55358186d0bb93558a87d13159dfbc989351 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 22 Sep 2015 23:18:10 +0300 Subject: IB/core: Add support of checksum capability reporting for RC and RAW Two enum members IB_DEVICE_RC_IP_CSUM and IB_DEVICE_RAW_IP_CSUM are added to ib_device_cap_flags. Device should set these two flags if they support insertion of UDP and TCP checksum on outgoing IPv4 messages and can verify the validity of checksum for incoming IPv4 messages, for RC IPoIB and RAW over Ethernet respectively. They are similar to IB_DEVICE_UD_IP_CSUM. Signed-off-by: Bodong Wang Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7845fae6f2df..e1f65e204d37 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -137,6 +137,8 @@ enum ib_device_cap_flags { IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), + IB_DEVICE_RC_IP_CSUM = (1<<25), + IB_DEVICE_RAW_IP_CSUM = (1<<26), IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), IB_DEVICE_ON_DEMAND_PAGING = (1<<31), @@ -873,7 +875,6 @@ enum ib_qp_create_flags { IB_QP_CREATE_RESERVED_END = 1 << 31, }; - /* * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler * callback to destroy the passed in QP. -- cgit v1.2.3 From 070b399723206a4ed80410358f801a7df68d08e6 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 22 Sep 2015 23:18:11 +0300 Subject: IB/mlx4: Report checksum offload cap for RAW QP when query device Signed-off-by: Bodong Wang Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index efecdf0216d8..38be8dc2932e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -442,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; props->vendor_part_id = dev->dev->persist->pdev->device; -- cgit v1.2.3 From e622f2f4ad2142d2a613a57fb85f8cf737935ef5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Oct 2015 09:16:33 +0100 Subject: IB: split struct ib_send_wr This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche [srp, srpt] Reviewed-by: Chuck Lever [sunrpc] Tested-by: Haggai Eran Tested-by: Sagi Grimberg Tested-by: Steve Wise --- drivers/infiniband/core/agent.c | 2 +- drivers/infiniband/core/mad.c | 40 ++--- drivers/infiniband/core/mad_priv.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 143 ++++++++++------- drivers/infiniband/hw/cxgb3/iwch_qp.c | 36 ++--- drivers/infiniband/hw/cxgb4/qp.c | 46 +++--- drivers/infiniband/hw/mlx4/mad.c | 54 ++++--- drivers/infiniband/hw/mlx4/mr.c | 20 +-- drivers/infiniband/hw/mlx4/qp.c | 178 +++++++++++---------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 + drivers/infiniband/hw/mlx5/mr.c | 52 +++--- drivers/infiniband/hw/mlx5/qp.c | 114 ++++++------- drivers/infiniband/hw/mthca/mthca_qp.c | 84 +++++----- drivers/infiniband/hw/nes/nes_verbs.c | 41 ++--- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 53 +++--- drivers/infiniband/hw/qib/qib_keys.c | 23 +-- drivers/infiniband/hw/qib/qib_qp.c | 2 +- drivers/infiniband/hw/qib/qib_rc.c | 38 ++--- drivers/infiniband/hw/qib/qib_ruc.c | 20 +-- drivers/infiniband/hw/qib/qib_uc.c | 4 +- drivers/infiniband/hw/qib/qib_ud.c | 20 +-- drivers/infiniband/hw/qib/qib_verbs.c | 22 ++- drivers/infiniband/hw/qib/qib_verbs.h | 8 +- drivers/infiniband/ulp/ipoib/ipoib.h | 4 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 22 +-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 6 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 10 +- drivers/infiniband/ulp/iser/iser_memory.c | 72 ++++----- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 156 +++++++++--------- drivers/infiniband/ulp/isert/ib_isert.h | 6 +- drivers/infiniband/ulp/srp/ib_srp.c | 26 +-- drivers/infiniband/ulp/srpt/ib_srpt.c | 32 ++-- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c | 35 ++-- drivers/staging/rdma/amso1100/c2_qp.c | 8 +- drivers/staging/rdma/ehca/ehca_reqs.c | 53 +++--- drivers/staging/rdma/hfi1/keys.c | 22 +-- drivers/staging/rdma/hfi1/qp.c | 2 +- drivers/staging/rdma/hfi1/rc.c | 24 +-- drivers/staging/rdma/hfi1/ruc.c | 18 +-- drivers/staging/rdma/hfi1/uc.c | 4 +- drivers/staging/rdma/hfi1/ud.c | 20 +-- drivers/staging/rdma/hfi1/verbs.c | 23 ++- drivers/staging/rdma/hfi1/verbs.h | 10 +- drivers/staging/rdma/ipath/ipath_rc.c | 24 +-- drivers/staging/rdma/ipath/ipath_ruc.c | 16 +- drivers/staging/rdma/ipath/ipath_uc.c | 4 +- drivers/staging/rdma/ipath/ipath_ud.c | 26 +-- drivers/staging/rdma/ipath/ipath_verbs.c | 20 ++- drivers/staging/rdma/ipath/ipath_verbs.h | 9 +- include/rdma/ib_verbs.h | 130 +++++++++------ net/rds/ib.h | 6 +- net/rds/ib_send.c | 71 ++++---- net/rds/iw.h | 6 +- net/rds/iw_rdma.c | 29 ++-- net/rds/iw_send.c | 113 ++++++------- net/sunrpc/xprtrdma/frwr_ops.c | 23 +-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 70 ++++---- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 18 +-- 63 files changed, 1152 insertions(+), 986 deletions(-) diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 0429040304fd..4fa524dfb6cf 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh * mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); - mad_send_wr->send_wr.wr.ud.port_num = port_num; + mad_send_wr->send_wr.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4b5c72311deb..844d9bb22700 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_device *device = mad_agent_priv->agent.device; u8 port_num; struct ib_wc mad_wc; - struct ib_send_wr *send_wr = &mad_send_wr->send_wr; + struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; @@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - port_num = send_wr->wr.ud.port_num; + port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; @@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, } build_smp_wc(mad_agent_priv->agent.qp, - send_wr->wr_id, drslid, - send_wr->wr.ud.pkey_index, - send_wr->wr.ud.port_num, &mad_wc); + send_wr->wr.wr_id, drslid, + send_wr->pkey_index, + send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len @@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local->mad_send_wr = mad_send_wr; if (opa) { - local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; + local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ @@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; - mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; - mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; - mad_send_wr->send_wr.num_sge = 2; - mad_send_wr->send_wr.opcode = IB_WR_SEND; - mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; - mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; - mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; + mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr; + mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; + mad_send_wr->send_wr.wr.num_sge = 2; + mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; + mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; + mad_send_wr->send_wr.remote_qpn = remote_qpn; + mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; + mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); @@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; - mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; + mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_agent = mad_send_wr->send_buf.mad_agent; @@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { - ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, + ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { @@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, * request associated with the completion */ next_send_buf = send_buf->next; - mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; + mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { @@ -2457,7 +2457,7 @@ retry: ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { - ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, &bad_send_wr); if (ret) { dev_err(&port_priv->device->dev, @@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, struct ib_send_wr *bad_send_wr; mad_send_wr->retry = 0; - ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, + ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); if (ret) ib_mad_send_done_handler(port_priv, wc); @@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work) build_smp_wc(recv_mad_agent->agent.qp, (unsigned long) local->mad_send_wr, be16_to_cpu(IB_LID_PERMISSIVE), - local->mad_send_wr->send_wr.wr.ud.pkey_index, + local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 4a4f7aad0978..990698a6ab4b 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -123,7 +123,7 @@ struct ib_mad_send_wr_private { struct ib_mad_send_buf send_buf; u64 header_mapping; u64 payload_mapping; - struct ib_send_wr send_wr; + struct ib_ud_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index be4cb9f04be3..8adb71fd6a3a 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2303,6 +2303,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, return in_len; } +static void *alloc_wr(size_t wr_size, __u32 num_sge) +{ + return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + + num_sge * sizeof (struct ib_sge), GFP_KERNEL); +}; + ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -2351,14 +2357,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, goto out_put; } - next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + - user_wr->num_sge * sizeof (struct ib_sge), - GFP_KERNEL); - if (!next) { - ret = -ENOMEM; + if (is_ud) { + struct ib_ud_wr *ud; + + if (user_wr->opcode != IB_WR_SEND && + user_wr->opcode != IB_WR_SEND_WITH_IMM) { + ret = -EINVAL; + goto out_put; + } + + ud = alloc_wr(sizeof(*ud), user_wr->num_sge); + if (!ud) { + ret = -ENOMEM; + goto out_put; + } + + ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); + if (!ud->ah) { + kfree(ud); + ret = -EINVAL; + goto out_put; + } + ud->remote_qpn = user_wr->wr.ud.remote_qpn; + ud->remote_qkey = user_wr->wr.ud.remote_qkey; + + next = &ud->wr; + } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE || + user_wr->opcode == IB_WR_RDMA_READ) { + struct ib_rdma_wr *rdma; + + rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge); + if (!rdma) { + ret = -ENOMEM; + goto out_put; + } + + rdma->remote_addr = user_wr->wr.rdma.remote_addr; + rdma->rkey = user_wr->wr.rdma.rkey; + + next = &rdma->wr; + } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + struct ib_atomic_wr *atomic; + + atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge); + if (!atomic) { + ret = -ENOMEM; + goto out_put; + } + + atomic->remote_addr = user_wr->wr.atomic.remote_addr; + atomic->compare_add = user_wr->wr.atomic.compare_add; + atomic->swap = user_wr->wr.atomic.swap; + atomic->rkey = user_wr->wr.atomic.rkey; + + next = &atomic->wr; + } else if (user_wr->opcode == IB_WR_SEND || + user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_SEND_WITH_INV) { + next = alloc_wr(sizeof(*next), user_wr->num_sge); + if (!next) { + ret = -ENOMEM; + goto out_put; + } + } else { + ret = -EINVAL; goto out_put; } + if (user_wr->opcode == IB_WR_SEND_WITH_IMM || + user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; + } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { + next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; + } + if (!last) wr = next; else @@ -2371,60 +2446,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; - if (is_ud) { - if (next->opcode != IB_WR_SEND && - next->opcode != IB_WR_SEND_WITH_IMM) { - ret = -EINVAL; - goto out_put; - } - - next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah, - file->ucontext); - if (!next->wr.ud.ah) { - ret = -EINVAL; - goto out_put; - } - next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; - next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; - if (next->opcode == IB_WR_SEND_WITH_IMM) - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - } else { - switch (next->opcode) { - case IB_WR_RDMA_WRITE_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_READ: - next->wr.rdma.remote_addr = - user_wr->wr.rdma.remote_addr; - next->wr.rdma.rkey = - user_wr->wr.rdma.rkey; - break; - case IB_WR_SEND_WITH_IMM: - next->ex.imm_data = - (__be32 __force) user_wr->ex.imm_data; - break; - case IB_WR_SEND_WITH_INV: - next->ex.invalidate_rkey = - user_wr->ex.invalidate_rkey; - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - next->wr.atomic.remote_addr = - user_wr->wr.atomic.remote_addr; - next->wr.atomic.compare_add = - user_wr->wr.atomic.compare_add; - next->wr.atomic.swap = user_wr->wr.atomic.swap; - next->wr.atomic.rkey = user_wr->wr.atomic.rkey; - case IB_WR_SEND: - break; - default: - ret = -EINVAL; - goto out_put; - } - } - if (next->num_sge) { next->sg_list = (void *) next + ALIGN(sizeof *next, sizeof (struct ib_sge)); @@ -2458,8 +2479,8 @@ out_put: put_qp_read(qp); while (wr) { - if (is_ud && wr->wr.ud.ah) - put_ah_read(wr->wr.ud.ah); + if (is_ud && ud_wr(wr)->ah) + put_ah_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index b57c0befd962..bac0508fedd9 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, wqe->write.reserved[0] = 0; wqe->write.reserved[1] = 0; wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; @@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); @@ -146,27 +146,27 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *send_wr, u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { + struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); int i; __be64 *p; - if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + if (wr->page_list_len > T3_MAX_FASTREG_DEPTH) return -EINVAL; *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.stag = cpu_to_be32(wr->rkey); + wqe->fastreg.len = cpu_to_be32(wr->length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = cpu_to_be32(wr->iova_start & 0xffffffff); wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | - V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_PAGE_COUNT(wr->page_list_len) | + V_FR_PAGE_SIZE(wr->page_shift-12) | V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + V_FR_PERMS(iwch_ib_to_tpt_access(wr->access_flags))); p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + for (i = 0; i < wr->page_list_len; i++, p++) { /* If we need a 2nd WR, then set it up */ if (i == T3_MAX_FASTREG_FRAG) { @@ -175,14 +175,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, Q_PTR2IDX((wq->wptr+1), wq->size_log2)); build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + 0, 1 + wr->page_list_len - T3_MAX_FASTREG_FRAG, T3_EOP); p = &wqe->pbl_frag.pbl_addrs[0]; } - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + *p = cpu_to_be64((u64)wr->page_list->page_list[i]); } - *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + *flit_cnt = 5 + wr->page_list_len; if (*flit_cnt > 15) *flit_cnt = 15; return 0; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 6517e1208ccb..b60498fff99a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; - wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); - wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, @@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { - wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); - wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr >> 32)); - wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr @@ -606,39 +606,36 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, } static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16, u8 t5dev) + struct ib_send_wr *send_wr, u8 *len16, u8 t5dev) { - + struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); struct fw_ri_immd *imdp; __be64 *p; int i; - int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int pbllen = roundup(wr->page_list_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > - t4_max_fr_depth(use_dsgl)) + if (wr->page_list_len > t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.pgsz_shift = wr->page_shift - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access_flags); wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); - wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); - wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & - 0xffffffff); + wqe->fr.len_lo = cpu_to_be32(wr->length); + wqe->fr.stag = cpu_to_be32(wr->rkey); + wqe->fr.va_hi = cpu_to_be32(wr->iova_start >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(wr->iova_start & 0xffffffff); if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); + to_c4iw_fr_page_list(wr->page_list); struct fw_ri_dsgl *sglp; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - wr->wr.fast_reg.page_list->page_list[i] = (__force u64) - cpu_to_be64((u64) - wr->wr.fast_reg.page_list->page_list[i]); + for (i = 0; i < wr->page_list_len; i++) { + wr->page_list->page_list[i] = (__force u64) + cpu_to_be64((u64)wr->page_list->page_list[i]); } sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); @@ -657,9 +654,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); + for (i = 0; i < wr->page_list_len; i++) { + *p = cpu_to_be64((u64)wr->page_list->page_list[i]); rem -= sizeof(*p); if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1cd75ff02251..5f2de2ed5598 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -457,7 +457,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, struct ib_grh *grh, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *tun_ctx; struct mlx4_ib_demux_pv_qp *tun_qp; struct mlx4_rcv_tunnel_mad *tun_mad; @@ -582,18 +583,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, list.length = sizeof (struct mlx4_rcv_tunnel_mad); list.lkey = tun_ctx->pd->local_dma_lkey; - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; - wr.wr.ud.remote_qpn = dqpn; - wr.next = NULL; - wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(src_qp, &wr, &bad_wr); + wr.ah = ah; + wr.port_num = port; + wr.remote_qkey = IB_QP_SET_QKEY; + wr.remote_qpn = dqpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(src_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); @@ -1175,7 +1176,8 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_ud_wr wr; + struct ib_send_wr *bad_wr; struct mlx4_ib_demux_pv_ctx *sqp_ctx; struct mlx4_ib_demux_pv_qp *sqp; struct mlx4_mad_snd_buf *sqp_mad; @@ -1246,22 +1248,22 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, list.length = sizeof (struct mlx4_mad_snd_buf); list.lkey = sqp_ctx->pd->local_dma_lkey; - wr.wr.ud.ah = ah; - wr.wr.ud.port_num = port; - wr.wr.ud.pkey_index = wire_pkey_ix; - wr.wr.ud.remote_qkey = qkey; - wr.wr.ud.remote_qpn = remote_qpn; - wr.next = NULL; - wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; + wr.ah = ah; + wr.port_num = port; + wr.pkey_index = wire_pkey_ix; + wr.remote_qkey = qkey; + wr.remote_qpn = remote_qpn; + wr.wr.next = NULL; + wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); + wr.wr.sg_list = &list; + wr.wr.num_sge = 1; + wr.wr.opcode = IB_WR_SEND; + wr.wr.send_flags = IB_SEND_SIGNALED; if (s_mac) memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); - ret = ib_post_send(send_qp, &wr, &bad_wr); + ret = ib_post_send(send_qp, &wr.wr, &bad_wr); out: if (ret) ib_destroy_ah(ah); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 2542fd3c1a49..5bba176e9dfa 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -321,21 +321,21 @@ err_free: int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { - struct ib_send_wr wr; + struct ib_bind_mw_wr wr; struct ib_send_wr *bad_wr; int ret; memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_BIND_MW; - wr.wr_id = mw_bind->wr_id; - wr.send_flags = mw_bind->send_flags; - wr.wr.bind_mw.mw = mw; - wr.wr.bind_mw.bind_info = mw_bind->bind_info; - wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); - - ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + wr.wr.opcode = IB_WR_BIND_MW; + wr.wr.wr_id = mw_bind->wr_id; + wr.wr.send_flags = mw_bind->send_flags; + wr.mw = mw; + wr.bind_info = mw_bind->bind_info; + wr.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr); if (!ret) - mw->rkey = wr.wr.bind_mw.rkey; + mw->rkey = wr.rkey; return ret; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4ad9be3ad61c..3831cddb551f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2036,14 +2036,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) } static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); struct ib_device *ib_dev = &mdev->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); u16 pkey; u32 qkey; int send_size; @@ -2051,13 +2051,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, int spc; int i; - if (wr->opcode != IB_WR_SEND) + if (wr->wr.opcode != IB_WR_SEND) return -EINVAL; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ @@ -2082,11 +2082,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; sqp->ud_header.lrh.virtual_lane = 0; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); @@ -2158,14 +2158,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac) } } -static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, +static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); union ib_gid sgid; u16 pkey; int send_size; @@ -2179,8 +2179,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, bool is_grh; send_size = 0; - for (i = 0; i < wr->num_sge; ++i) - send_size += wr->sg_list[i].length; + for (i = 0; i < wr->wr.num_sge; ++i) + send_size += wr->wr.sg_list[i].length; is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); @@ -2257,7 +2257,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, mlx->rlid = sqp->ud_header.lrh.destination_lid; } - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -2265,7 +2265,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -2308,16 +2308,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else - ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); + ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -2405,43 +2405,45 @@ static __be32 convert_access(int acc) cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } -static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, + struct ib_fast_reg_wr *wr) { - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->page_list); int i; - for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) + for (i = 0; i < wr->page_list_len; ++i) mfrpl->mapped_page_list[i] = - cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | + cpu_to_be64(wr->page_list->page_list[i] | MLX4_MTT_FLAG_PRESENT); - fseg->flags = convert_access(wr->wr.fast_reg.access_flags); - fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->flags = convert_access(wr->access_flags); + fseg->mem_key = cpu_to_be32(wr->rkey); fseg->buf_list = cpu_to_be64(mfrpl->map); - fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->start_addr = cpu_to_be64(wr->iova_start); + fseg->reg_len = cpu_to_be64(wr->length); fseg->offset = 0; /* XXX -- is this just for ZBVA? */ - fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->page_size = cpu_to_be32(wr->page_shift); fseg->reserved[0] = 0; fseg->reserved[1] = 0; } -static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, + struct ib_bind_mw_wr *wr) { bseg->flags1 = - convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + convert_access(wr->bind_info.mw_access_flags) & cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); bseg->flags2 = 0; - if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + if (wr->mw->type == IB_MW_TYPE_2) bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); - if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + if (wr->bind_info.mw_access_flags & IB_ZERO_BASED) bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); - bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); - bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); - bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); - bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); + bseg->new_rkey = cpu_to_be32(wr->rkey); + bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->bind_info.addr); + bseg->length = cpu_to_be64(wr->bind_info.length); } static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) @@ -2458,46 +2460,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) +static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); + } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->compare_add); + aseg->compare = cpu_to_be64(wr->compare_add_mask); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); + aseg->compare = cpu_to_be64(wr->compare_add); + aseg->compare_mask = cpu_to_be64(wr->compare_add_mask); } static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); - dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); - dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; - memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); + memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(wr->remote_qpn); + dseg->qkey = cpu_to_be32(wr->remote_qkey); + dseg->vlan = to_mah(wr->ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6); } static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, + struct ib_ud_wr *wr, enum mlx4_ib_qp_type qpt) { - union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; + union mlx4_ext_av *av = &to_mah(wr->ah)->av; struct mlx4_av sqp_av = {0}; int port = *((u8 *) &av->ib.port_pd) & 0x3; @@ -2516,18 +2519,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } -static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) +static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { struct mlx4_wqe_inline_seg *inl = wqe; struct mlx4_ib_tunnel_header hdr; - struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + struct mlx4_ib_ah *ah = to_mah(wr->ah); int spc; int i; memcpy(&hdr.av, &ah->av, sizeof hdr.av); - hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); - hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + hdr.remote_qpn = cpu_to_be32(wr->remote_qpn); + hdr.pkey_index = cpu_to_be16(wr->pkey_index); + hdr.qkey = cpu_to_be32(wr->remote_qkey); memcpy(hdr.mac, ah->av.eth.mac, 6); hdr.vlan = ah->av.eth.vlan; @@ -2599,22 +2602,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, struct mlx4_ib_qp *qp, unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh) { - unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); + unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && - wr->num_sge > qp->sq.max_gs - (halign >> 4))) + wr->wr.num_sge > qp->sq.max_gs - (halign >> 4))) return -EINVAL; - memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); + memcpy(wqe->header, wr->header, wr->hlen); - *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen); + *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen); *lso_seg_len = halign; return 0; } @@ -2713,11 +2716,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2726,11 +2729,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); - set_masked_atomic_seg(wqe, wr); + set_masked_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); size += (sizeof (struct mlx4_wqe_raddr_seg) + @@ -2741,8 +2744,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; @@ -2758,7 +2761,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_fmr_seg(wqe, wr); + set_fmr_seg(wqe, fast_reg_wr(wr)); wqe += sizeof (struct mlx4_wqe_fmr_seg); size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; @@ -2766,7 +2769,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_BIND_MW: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_bind_seg(wqe, wr); + set_bind_seg(wqe, bind_mw_wr(wr)); wqe += sizeof(struct mlx4_wqe_bind_seg); size += sizeof(struct mlx4_wqe_bind_seg) / 16; break; @@ -2777,7 +2780,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2788,19 +2792,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case MLX4_IB_QPT_TUN_SMI: case MLX4_IB_QPT_TUN_GSI: /* this is a UD qp used in MAD responses to slaves. */ - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); /* set the forced-loopback bit in the data seg av */ *(__be32 *) wqe |= cpu_to_be32(0x80000000); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; break; case MLX4_IB_QPT_UD: - set_datagram_seg(wqe, wr); + set_datagram_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); + err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen, + &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2812,7 +2817,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), + ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -2823,7 +2829,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, add_zero_len_inline(wqe); wqe += 16; size++; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; @@ -2833,18 +2839,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, + ud_wr(wr), qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; - build_tunnel_header(wr, wqe, &seglen); + build_tunnel_header(ud_wr(wr), wqe, &seglen); wqe += seglen; size += seglen / 16; break; case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); + err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 22123b79d550..29f3ecdbe790 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -245,6 +245,7 @@ enum mlx5_ib_qp_flags { }; struct mlx5_umr_wr { + struct ib_send_wr wr; union { u64 virt_addr; u64 offset; @@ -257,6 +258,11 @@ struct mlx5_umr_wr { u32 mkey; }; +static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct mlx5_umr_wr, wr); +} + struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 54a15b5d336d..b30d4ae0fb61 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -687,7 +687,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); sg->addr = dma; sg->length = ALIGN(sizeof(u64) * n, 64); @@ -715,7 +715,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, struct ib_send_wr *wr, u32 key) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; wr->opcode = MLX5_IB_WR_UMR; @@ -752,7 +752,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; @@ -798,14 +799,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, goto free_pas; } - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, - virt_addr, len, access_flags); + memset(&umrwr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, + page_shift, virt_addr, len, access_flags); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); goto unmap_dma; @@ -851,8 +852,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int size; __be64 *pas; dma_addr_t dma; - struct ib_send_wr wr, *bad; - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; + struct ib_send_wr *bad; + struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); @@ -917,26 +918,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; + wr.wr.wr_id = (u64)(unsigned long)&umr_context; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), MLX5_UMR_MTT_ALIGNMENT); sg.lkey = dev->umrc.pd->local_dma_lkey; - wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | + wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | MLX5_IB_SEND_UMR_UPDATE_MTT; - wr.sg_list = &sg; - wr.num_sge = 1; - wr.opcode = MLX5_IB_WR_UMR; - umrwr->npages = sg.length / sizeof(u64); - umrwr->page_shift = PAGE_SHIFT; - umrwr->mkey = mr->mmr.key; - umrwr->target.offset = start_page_index; + wr.wr.sg_list = &sg; + wr.wr.num_sge = 1; + wr.wr.opcode = MLX5_IB_WR_UMR; + wr.npages = sg.length / sizeof(u64); + wr.page_shift = PAGE_SHIFT; + wr.mkey = mr->mmr.key; + wr.target.offset = start_page_index; mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); } else { @@ -1122,16 +1123,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; - struct ib_send_wr wr, *bad; + struct mlx5_umr_wr umrwr; + struct ib_send_wr *bad; int err; - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)&umr_context; - prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + memset(&umrwr.wr, 0, sizeof(umrwr)); + umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - err = ib_post_send(umrc->qp, &wr, &bad); + err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6f521a3418e8..d4c36af4270f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1838,9 +1838,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { - memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); - dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); - dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey); } static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) @@ -1908,7 +1908,7 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } umr->flags = (1 << 5); /* fail if not free */ - umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + umr->klm_octowords = get_klm_octo(fast_reg_wr(wr)->page_list_len); umr->mkey_mask = frwr_mkey_mask(); } @@ -1952,7 +1952,7 @@ static __be64 get_umr_update_mtt_mask(void) static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(umr, 0, sizeof(*umr)); @@ -1996,20 +1996,20 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, return; } - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + seg->flags = get_umr_flags(fast_reg_wr(wr)->access_flags) | MLX5_ACCESS_MODE_MTT; *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); - seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + seg->qpn_mkey7_0 = cpu_to_be32((fast_reg_wr(wr)->rkey & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); - seg->len = cpu_to_be64(wr->wr.fast_reg.length); - seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); - seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->start_addr = cpu_to_be64(fast_reg_wr(wr)->iova_start); + seg->len = cpu_to_be64(fast_reg_wr(wr)->length); + seg->xlt_oct_size = cpu_to_be32((fast_reg_wr(wr)->page_list_len + 1) / 2); + seg->log2_page_size = fast_reg_wr(wr)->page_shift; } static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) { - struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; + struct mlx5_umr_wr *umrwr = umr_wr(wr); memset(seg, 0, sizeof(*seg)); if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { @@ -2034,15 +2034,15 @@ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_pd *pd, int writ) { - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); - u64 *page_list = wr->wr.fast_reg.page_list->page_list; + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(fast_reg_wr(wr)->page_list); + u64 *page_list = fast_reg_wr(wr)->page_list->page_list; u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); int i; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) + for (i = 0; i < fast_reg_wr(wr)->page_list_len; i++) mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); dseg->addr = cpu_to_be64(mfrpl->map); - dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); + dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * fast_reg_wr(wr)->page_list_len, 64)); dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } @@ -2224,22 +2224,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, - void **seg, int *size) +static int set_sig_data_segment(struct ib_sig_handover_wr *wr, + struct mlx5_ib_qp *qp, void **seg, int *size) { - struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_sig_attrs *sig_attrs = wr->sig_attrs; + struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->sg_list->length; - u32 data_key = wr->sg_list->lkey; - u64 data_va = wr->sg_list->addr; + u32 data_len = wr->wr.sg_list->length; + u32 data_key = wr->wr.sg_list->lkey; + u64 data_va = wr->wr.sg_list->addr; int ret; int wqe_size; - if (!wr->wr.sig_handover.prot || - (data_key == wr->wr.sig_handover.prot->lkey && - data_va == wr->wr.sig_handover.prot->addr && - data_len == wr->wr.sig_handover.prot->length)) { + if (!wr->prot || + (data_key == wr->prot->lkey && + data_va == wr->prot->addr && + data_len == wr->prot->length)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -2273,8 +2273,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->wr.sig_handover.prot->lkey; - u64 prot_va = wr->wr.sig_handover.prot->addr; + u32 prot_key = wr->prot->lkey; + u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -2326,16 +2326,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - struct ib_send_wr *wr, u32 nelements, + struct ib_sig_handover_wr *wr, u32 nelements, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + seg->flags = get_umr_flags(wr->access_flags) | MLX5_ACCESS_MODE_KLM; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | @@ -2346,7 +2346,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, } static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, u32 nelements) + u32 nelements) { memset(umr, 0, sizeof(*umr)); @@ -2357,37 +2357,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, } -static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, +static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size) { - struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); u32 pdn = get_pd(qp)->pdn; u32 klm_oct_size; int region_len, ret; - if (unlikely(wr->num_sge != 1) || - unlikely(wr->wr.sig_handover.access_flags & - IB_ACCESS_REMOTE_ATOMIC) || + if (unlikely(wr->wr.num_sge != 1) || + unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; /* length of the protected region, data + protection */ - region_len = wr->sg_list->length; - if (wr->wr.sig_handover.prot && - (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || - wr->wr.sig_handover.prot->addr != wr->sg_list->addr || - wr->wr.sig_handover.prot->length != wr->sg_list->length)) - region_len += wr->wr.sig_handover.prot->length; + region_len = wr->wr.sg_list->length; + if (wr->prot && + (wr->prot->lkey != wr->wr.sg_list->lkey || + wr->prot->addr != wr->wr.sg_list->addr || + wr->prot->length != wr->wr.sg_list->length)) + region_len += wr->prot->length; /** * KLM octoword size - if protection was provided * then we use strided block format (3 octowords), * else we use single KLM (1 octoword) **/ - klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + klm_oct_size = wr->prot ? 3 : 1; - set_sig_umr_segment(*seg, wr, klm_oct_size); + set_sig_umr_segment(*seg, klm_oct_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -2454,8 +2454,8 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); if (!li) { - if (unlikely(wr->wr.fast_reg.page_list_len > - wr->wr.fast_reg.page_list->max_page_list_len)) + if (unlikely(fast_reg_wr(wr)->page_list_len > + fast_reg_wr(wr)->page_list->max_page_list_len)) return -ENOMEM; set_frwr_pages(*seg, wr, mdev, pd, writ); @@ -2636,8 +2636,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2666,7 +2666,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + ctrl->imm = cpu_to_be32(fast_reg_wr(wr)->rkey); err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); if (err) { mlx5_ib_warn(dev, "\n"); @@ -2678,7 +2678,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(wr->wr.sig_handover.sig_mr); + mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); err = set_sig_umr_wr(wr, qp, &seg, &size); @@ -2706,7 +2706,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, mr->sig->psv_memory.psv_idx, &seg, &size); if (err) { @@ -2728,7 +2728,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); if (err) { @@ -2752,8 +2752,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(seg, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(seg, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); seg += sizeof(struct mlx5_wqe_raddr_seg); size += sizeof(struct mlx5_wqe_raddr_seg) / 16; break; @@ -2780,7 +2780,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; - ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); set_reg_umr_segment(seg, wr); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index e354b2f04ad9..35fe506e2cfa 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev, /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_send_wr *wr, + int ind, struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { @@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, u16 pkey; ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, - mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, + mthca_ah_grh_present(to_mah(wr->ah)), 0, &sqp->ud_header); - err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header); if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); @@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; - switch (wr->opcode) { + switch (wr->wr.opcode) { case IB_WR_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, case IB_WR_SEND_WITH_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->ex.imm_data; + sqp->ud_header.immediate_data = wr->wr.ex.imm_data; break; default: return -EINVAL; @@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->wr.ud.pkey_index, &pkey); + wr->pkey_index, &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); + sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? + sqp->qkey : wr->remote_qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, @@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg, } static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, - struct ib_send_wr *wr) + struct ib_atomic_wr *wr) { - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + aseg->swap_add = cpu_to_be64(wr->swap); + aseg->compare = cpu_to_be64(wr->compare_add); } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->swap_add = cpu_to_be64(wr->compare_add); aseg->compare = 0; } } static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); - useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); + useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, - struct ib_send_wr *wr) + struct ib_ud_wr *wr) { - memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); - useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); + useg->dqpn = cpu_to_be32(wr->remote_qpn); + useg->qkey = cpu_to_be32(wr->remote_qkey); } int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, @@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_tavor_ud_seg(wqe, wr); + set_tavor_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { @@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(wqe, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); - set_atomic_seg(wqe, wr); + set_atomic_seg(wqe, atomic_wr(wr)); wqe += sizeof (struct mthca_atomic_seg); size += (sizeof (struct mthca_raddr_seg) + sizeof (struct mthca_atomic_seg)) / 16; @@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - set_raddr_seg(wqe, wr->wr.rdma.remote_addr, - wr->wr.rdma.rkey); + set_raddr_seg(wqe, rdma_wr(wr)->remote_addr, + rdma_wr(wr)->rkey); wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; break; @@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UD: - set_arbel_ud_seg(wqe, wr); + set_arbel_ud_seg(wqe, ud_wr(wr)); wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, wr, + err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), wqe - sizeof (struct mthca_next_seg), wqe); if (err) { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 44cb513f9a87..f71b37b75f82 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3372,9 +3372,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); if ((ib_wr->send_flags & IB_SEND_INLINE) && ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && @@ -3409,9 +3409,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, } set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); + rdma_wr(ib_wr)->remote_addr); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); + rdma_wr(ib_wr)->rkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, ib_wr->sg_list->length); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, @@ -3428,15 +3428,16 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, case IB_WR_FAST_REG_MR: { int i; - int flags = ib_wr->wr.fast_reg.access_flags; + struct ib_fast_reg_wr *fwr = fast_reg_wr(ib_wr); + int flags = fwr->access_flags; struct nes_ib_fast_reg_page_list *pnesfrpl = - container_of(ib_wr->wr.fast_reg.page_list, + container_of(fwr->page_list, struct nes_ib_fast_reg_page_list, ibfrpl); u64 *src_page_list = pnesfrpl->ibfrpl.page_list; u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; - if (ib_wr->wr.fast_reg.page_list_len > + if (fwr->page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); err = -EINVAL; @@ -3445,19 +3446,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc = NES_IWARP_SQ_OP_FAST_REG; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, - ib_wr->wr.fast_reg.iova_start); + fwr->iova_start); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, - ib_wr->wr.fast_reg.length); + fwr->length); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, - ib_wr->wr.fast_reg.rkey); + fwr->rkey); /* Set page size: */ - if (ib_wr->wr.fast_reg.page_shift == 12) { + if (fwr->page_shift == 12) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; - } else if (ib_wr->wr.fast_reg.page_shift == 21) { + } else if (fwr->page_shift == 21) { wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; } else { nes_debug(NES_DBG_IW_TX, "Invalid page shift," @@ -3480,11 +3481,11 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; /* Fill in PBL info: */ - if (ib_wr->wr.fast_reg.page_list_len > + if (fwr->page_list_len > pnesfrpl->ibfrpl.max_page_list_len) { nes_debug(NES_DBG_IW_TX, "Invalid page list length," " ib_wr=%p, value=%u, max=%u\n", - ib_wr, ib_wr->wr.fast_reg.page_list_len, + ib_wr, fwr->page_list_len, pnesfrpl->ibfrpl.max_page_list_len); err = -EINVAL; break; @@ -3496,19 +3497,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, - ib_wr->wr.fast_reg.page_list_len * 8); + fwr->page_list_len * 8); - for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) + for (i = 0; i < fwr->page_list_len; i++) dst_page_list[i] = cpu_to_le64(src_page_list[i]); nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " "length: %d, rkey: %0x, pgl_paddr: %llx, " "page_list_len: %u, wqe_misc: %x\n", - (unsigned long long) ib_wr->wr.fast_reg.iova_start, - ib_wr->wr.fast_reg.length, - ib_wr->wr.fast_reg.rkey, + (unsigned long long) fwr->iova_start, + fwr->length, + fwr->rkey, (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, - ib_wr->wr.fast_reg.page_list_len, + fwr->page_list_len, wqe_misc); break; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 1f3affb6a477..eb09e224acb9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1997,13 +1997,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, { struct ocrdma_ewqe_ud_hdr *ud_hdr = (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); - struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah); - ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn; if (qp->qp_type == IB_QPT_GSI) ud_hdr->qkey = qp->qkey; else - ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->qkey = ud_wr(wr)->remote_qkey; ud_hdr->rsvd_ahid = ah->id; if (ah->av->valid & OCRDMA_AV_VLAN_VALID) hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); @@ -2106,9 +2106,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); if (status) return status; - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; return 0; } @@ -2126,13 +2126,14 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); - ext_rw->addr_lo = wr->wr.rdma.remote_addr; - ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); - ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->addr_lo = rdma_wr(wr)->remote_addr; + ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr); + ext_rw->lrkey = rdma_wr(wr)->rkey; ext_rw->len = hdr->total_len; } -static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, +static void build_frmr_pbes(struct ib_fast_reg_wr *wr, + struct ocrdma_pbl *pbl_tbl, struct ocrdma_hw_mr *hwmr) { int i; @@ -2144,12 +2145,12 @@ static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, num_pbes = 0; /* go through the OS phy regions & fill hw pbe entries into pbls. */ - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + for (i = 0; i < wr->page_list_len; i++) { /* number of pbes can be more for one OS buf, when * buffers are of different sizes. * split the ib_buf to one or more pbes. */ - buf_addr = wr->wr.fast_reg.page_list->page_list[i]; + buf_addr = wr->page_list->page_list[i]; pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); num_pbes += 1; @@ -2178,9 +2179,10 @@ static int get_encoded_page_size(int pg_sz) static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *wr) + struct ib_send_wr *send_wr) { u64 fbo; + struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); struct ocrdma_mr *mr; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2188,33 +2190,32 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); - if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr) + if (wr->page_list_len > dev->attr.max_pages_per_frmr) return -EINVAL; hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); - if (wr->wr.fast_reg.page_list_len == 0) + if (wr->page_list_len == 0) BUG(); - if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE) + if (wr->access_flags & IB_ACCESS_LOCAL_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) + if (wr->access_flags & IB_ACCESS_REMOTE_WRITE) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; - if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) + if (wr->access_flags & IB_ACCESS_REMOTE_READ) hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; - hdr->lkey = wr->wr.fast_reg.rkey; - hdr->total_len = wr->wr.fast_reg.length; + hdr->lkey = wr->rkey; + hdr->total_len = wr->length; - fbo = wr->wr.fast_reg.iova_start - - (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + fbo = wr->iova_start - (wr->page_list->page_list[0] & PAGE_MASK); - fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); - fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); + fast_reg->va_hi = upper_32_bits(wr->iova_start); + fast_reg->va_lo = (u32) (wr->iova_start & 0xffffffff); fast_reg->fbo_hi = upper_32_bits(fbo); fast_reg->fbo_lo = (u32) fbo & 0xffffffff; - fast_reg->num_sges = wr->wr.fast_reg.page_list_len; + fast_reg->num_sges = wr->page_list_len; fast_reg->size_sge = - get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); + get_encoded_page_size(1 << wr->page_shift); mr = (struct ocrdma_mr *) (unsigned long) dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 5afaa218508d..eaf139a33b2e 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -338,12 +338,13 @@ bail: /* * Initialize the memory region specified by the work reqeust. */ -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) +int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *send_wr) { + struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_mregion *mr; - u32 rkey = wr->wr.fast_reg.rkey; + u32 rkey = wr->rkey; unsigned i, n, m; int ret = -EINVAL; unsigned long flags; @@ -360,22 +361,22 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; - if (wr->wr.fast_reg.page_list_len > mr->max_segs) + if (wr->page_list_len > mr->max_segs) goto bail; - ps = 1UL << wr->wr.fast_reg.page_shift; - if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + ps = 1UL << wr->page_shift; + if (wr->length > ps * wr->page_list_len) goto bail; - mr->user_base = wr->wr.fast_reg.iova_start; - mr->iova = wr->wr.fast_reg.iova_start; + mr->user_base = wr->iova_start; + mr->iova = wr->iova_start; mr->lkey = rkey; - mr->length = wr->wr.fast_reg.length; - mr->access_flags = wr->wr.fast_reg.access_flags; - page_list = wr->wr.fast_reg.page_list->page_list; + mr->length = wr->length; + mr->access_flags = wr->access_flags; + page_list = wr->page_list->page_list; m = 0; n = 0; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + for (i = 0; i < wr->page_list_len; i++) { mr->map[m]->segs[n].vaddr = (void *) page_list[i]; mr->map[m]->segs[n].length = ps; if (++n == QIB_SEGSZ) { diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 4fa88ba2963e..40f85bb3e0d3 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -436,7 +436,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 4544d6f88ad7..e6b7556d5221 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; goto bail; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp) len = pmtu; break; } - if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE) qp->s_state = OP(RDMA_WRITE_ONLY); else { - qp->s_state = - OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); /* Immediate data comes after RETH */ - ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + ohdr->u.rc.imm_data = + wqe->rdma_wr.wr.ex.imm_data; hwords += 1; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) + if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED) bth0 |= IB_BTH_SOLICITED; } bth2 |= IB_BTH_REQ_ACK; @@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_next_psn += (len - 1) / pmtu; wqe->lpsn = qp->s_next_psn++; } + ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp) qp->s_lsn++; wqe->lpsn = wqe->psn; } - if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp) */ len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 22e356ca8058..b1aa21bdd484 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -459,8 +459,8 @@ again: if (wqe->length == 0) break; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; @@ -472,8 +472,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; @@ -490,18 +490,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = - (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); qib_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index aa3a8035bb68..06a564589c35 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 26243b722b5e..59193f67ea78 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) u32 length; enum ib_qp_type sqptype, dqptype; - qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->n_pkt_drops++; return; @@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey; + qkey = (int)swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey; if (unlikely(qkey != qp->qkey)) { u16 lid; @@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) wc.qp = &qp->ibqp; wc.src_qp = sqp->ibqp.qp_num; wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? - swqe->wr.wr.ud.pkey_index : 0; + swqe->ud_wr.pkey_index : 0; wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); wc.sl = ah_attr->sl; wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); @@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) this_cpu_inc(ibp->pmastats->n_multicast_xmit); @@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp) bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? - wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); + wqe->ud_wr.pkey_index : qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); /* * Use the multicast QP if the destination LID is a multicast LID. @@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp) ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && ah_attr->dlid != QIB_PERMISSIVE_LID ? cpu_to_be32(QIB_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3dcc4985b60f..a6b0b098ff30 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -374,7 +374,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, wr->opcode != IB_WR_SEND_WITH_IMM) goto bail_inval; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) goto bail_inval; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) goto bail_inval; @@ -397,7 +397,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_FAST_REG_MR) + memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), + sizeof(wqe->fast_reg_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; j = 0; if (wr->num_sge) { @@ -426,7 +442,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, qp->port_num - 1)->ibmtu) goto bail_inval_free; else - atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); + atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount); wqe->ssn = qp->s_ssn++; qp->s_head = next; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a08df70e8503..8aa16851a5e6 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -338,7 +338,13 @@ struct qib_mr { * in qp->s_max_sge. */ struct qib_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_fast_reg_wr fast_reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4cd5428a2399..453860ade65e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -360,7 +360,7 @@ struct ipoib_dev_priv { unsigned tx_head; unsigned tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; - struct ib_send_wr tx_wr; + struct ib_ud_wr tx_wr; unsigned tx_outstanding; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -527,7 +527,7 @@ static inline void ipoib_build_sge(struct ipoib_dev_priv *priv, priv->tx_sge[i + off].addr = mapping[i + off]; priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); } - priv->tx_wr.num_sge = nr_frags + off; + priv->tx_wr.wr.num_sge = nr_frags + off; } #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index c78dc1638030..3ae9726efb98 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -700,9 +700,9 @@ static inline int post_send(struct ipoib_dev_priv *priv, ipoib_build_sge(priv, tx_req); - priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; + priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; - return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); + return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); } void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index d266667ca9b8..5ea0c14070d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -518,19 +518,19 @@ static inline int post_send(struct ipoib_dev_priv *priv, ipoib_build_sge(priv, tx_req); - priv->tx_wr.wr_id = wr_id; - priv->tx_wr.wr.ud.remote_qpn = qpn; - priv->tx_wr.wr.ud.ah = address; + priv->tx_wr.wr.wr_id = wr_id; + priv->tx_wr.remote_qpn = qpn; + priv->tx_wr.ah = address; if (head) { - priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size; - priv->tx_wr.wr.ud.header = head; - priv->tx_wr.wr.ud.hlen = hlen; - priv->tx_wr.opcode = IB_WR_LSO; + priv->tx_wr.mss = skb_shinfo(skb)->gso_size; + priv->tx_wr.header = head; + priv->tx_wr.hlen = hlen; + priv->tx_wr.wr.opcode = IB_WR_LSO; } else - priv->tx_wr.opcode = IB_WR_SEND; + priv->tx_wr.wr.opcode = IB_WR_SEND; - return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); + return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); } void ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -583,9 +583,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, } if (skb->ip_summed == CHECKSUM_PARTIAL) - priv->tx_wr.send_flags |= IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM; else - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f74316e679d2..65d916cc70c7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -461,7 +461,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) netdev_update_features(dev); dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); rtnl_unlock(); - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; ipoib_flush_paths(dev); rtnl_lock(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 136cbefe00f8..029589b21fe9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); - priv->tx_wr.wr.ud.remote_qkey = priv->qkey; + priv->tx_wr.remote_qkey = priv->qkey; set_qkey = 1; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 78845b6e8b81..d48c5bae7877 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -221,9 +221,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; - priv->tx_wr.opcode = IB_WR_SEND; - priv->tx_wr.sg_list = priv->tx_sge; - priv->tx_wr.send_flags = IB_SEND_SIGNALED; + priv->tx_wr.wr.opcode = IB_WR_SEND; + priv->tx_wr.wr.sg_list = priv->tx_sge; + priv->tx_wr.wr.send_flags = IB_SEND_SIGNALED; priv->rx_sge[0].lkey = priv->pd->local_dma_lkey; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a5edd6ede692..2fab519dbd86 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -300,7 +300,11 @@ struct iser_tx_desc { int num_sge; bool mapped; u8 wr_idx; - struct ib_send_wr wrs[ISER_MAX_WRS]; + union iser_wr { + struct ib_send_wr send; + struct ib_fast_reg_wr fast_reg; + struct ib_sig_handover_wr sig; + } wrs[ISER_MAX_WRS]; struct iser_mem_reg data_reg; struct iser_mem_reg prot_reg; struct ib_sig_attrs sig_attrs; @@ -712,11 +716,11 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn, static inline struct ib_send_wr * iser_tx_next_wr(struct iser_tx_desc *tx_desc) { - struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx]; + struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send; struct ib_send_wr *last_wr; if (tx_desc->wr_idx) { - last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1]; + last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send; last_wr->next = cur_wr; } tx_desc->wr_idx++; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4c46d67d37a1..f45e6a352173 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -683,7 +683,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; - struct ib_send_wr *wr; + struct ib_sig_handover_wr *wr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); @@ -693,26 +693,24 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (!pi_ctx->sig_mr_valid) { - wr = iser_tx_next_wr(tx_desc); - iser_inv_rkey(wr, pi_ctx->sig_mr); - } - - wr = iser_tx_next_wr(tx_desc); - wr->opcode = IB_WR_REG_SIG_MR; - wr->wr_id = ISER_FASTREG_LI_WRID; - wr->sg_list = &data_reg->sge; - wr->num_sge = 1; - wr->send_flags = 0; - wr->wr.sig_handover.sig_attrs = sig_attrs; - wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr; + if (!pi_ctx->sig_mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); + + wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); + wr->wr.opcode = IB_WR_REG_SIG_MR; + wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.sg_list = &data_reg->sge; + wr->wr.num_sge = 1; + wr->wr.send_flags = 0; + wr->sig_attrs = sig_attrs; + wr->sig_mr = pi_ctx->sig_mr; if (scsi_prot_sg_count(iser_task->sc)) - wr->wr.sig_handover.prot = &prot_reg->sge; + wr->prot = &prot_reg->sge; else - wr->wr.sig_handover.prot = NULL; - wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; + wr->prot = NULL; + wr->access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; pi_ctx->sig_mr_valid = 0; sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; @@ -737,7 +735,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct ib_mr *mr = rsc->mr; struct ib_fast_reg_page_list *frpl = rsc->frpl; struct iser_tx_desc *tx_desc = &iser_task->desc; - struct ib_send_wr *wr; + struct ib_fast_reg_wr *wr; int offset, size, plen; plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, @@ -747,24 +745,22 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return -EINVAL; } - if (!rsc->mr_valid) { - wr = iser_tx_next_wr(tx_desc); - iser_inv_rkey(wr, mr); - } - - wr = iser_tx_next_wr(tx_desc); - wr->opcode = IB_WR_FAST_REG_MR; - wr->wr_id = ISER_FASTREG_LI_WRID; - wr->send_flags = 0; - wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset; - wr->wr.fast_reg.page_list = frpl; - wr->wr.fast_reg.page_list_len = plen; - wr->wr.fast_reg.page_shift = SHIFT_4K; - wr->wr.fast_reg.length = size; - wr->wr.fast_reg.rkey = mr->rkey; - wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); + if (!rsc->mr_valid) + iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); + + wr = fast_reg_wr(iser_tx_next_wr(tx_desc)); + wr->wr.opcode = IB_WR_FAST_REG_MR; + wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.send_flags = 0; + wr->iova_start = frpl->page_list[0] + offset; + wr->page_list = frpl; + wr->page_list_len = plen; + wr->page_shift = SHIFT_4K; + wr->length = size; + wr->rkey = mr->rkey; + wr->access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); rsc->mr_valid = 0; reg->sge.lkey = mr->lkey; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 85132d867bc8..b26022e30af1 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1135,7 +1135,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr); + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, bad_wr->opcode); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 403bd29443b8..02c4c0b4569d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1703,10 +1703,10 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) isert_unmap_data_buf(isert_conn, &wr->data); } - if (wr->send_wr) { + if (wr->rdma_wr) { isert_dbg("Cmd %p free send_wr\n", isert_cmd); - kfree(wr->send_wr); - wr->send_wr = NULL; + kfree(wr->rdma_wr); + wr->rdma_wr = NULL; } if (wr->ib_sge) { @@ -1741,7 +1741,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) } wr->ib_sge = NULL; - wr->send_wr = NULL; + wr->rdma_wr = NULL; } static void @@ -1910,7 +1910,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc, } device->unreg_rdma_mem(isert_cmd, isert_conn); - wr->send_wr_num = 0; + wr->rdma_wr_num = 0; if (ret) transport_send_check_condition_and_sense(se_cmd, se_cmd->pi_err, 0); @@ -1938,7 +1938,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, iscsit_stop_dataout_timer(cmd); device->unreg_rdma_mem(isert_cmd, isert_conn); cmd->write_data_done = wr->data.len; - wr->send_wr_num = 0; + wr->rdma_wr_num = 0; isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); @@ -2384,7 +2384,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) static int isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, - struct ib_sge *ib_sge, struct ib_send_wr *send_wr, + struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr, u32 data_left, u32 offset) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; @@ -2399,8 +2399,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); page_off = offset % PAGE_SIZE; - send_wr->sg_list = ib_sge; - send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; + rdma_wr->wr.sg_list = ib_sge; + rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc; /* * Perform mapping of TCM scatterlist memory ib_sge dma_addr. */ @@ -2425,11 +2425,11 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); } - send_wr->num_sge = ++i; + rdma_wr->wr.num_sge = ++i; isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", - send_wr->sg_list, send_wr->num_sge); + rdma_wr->wr.sg_list, rdma_wr->wr.num_sge); - return send_wr->num_sge; + return rdma_wr->wr.num_sge; } static int @@ -2440,7 +2440,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; struct isert_data_buf *data = &wr->data; - struct ib_send_wr *send_wr; + struct ib_rdma_wr *rdma_wr; struct ib_sge *ib_sge; u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; int ret = 0, i, ib_sge_cnt; @@ -2465,11 +2465,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } wr->ib_sge = ib_sge; - wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); - wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, + wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); + wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num, GFP_KERNEL); - if (!wr->send_wr) { - isert_dbg("Unable to allocate wr->send_wr\n"); + if (!wr->rdma_wr) { + isert_dbg("Unable to allocate wr->rdma_wr\n"); ret = -ENOMEM; goto unmap_cmd; } @@ -2477,31 +2477,31 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, wr->isert_cmd = isert_cmd; rdma_write_max = isert_conn->max_sge * PAGE_SIZE; - for (i = 0; i < wr->send_wr_num; i++) { - send_wr = &isert_cmd->rdma_wr.send_wr[i]; + for (i = 0; i < wr->rdma_wr_num; i++) { + rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i]; data_len = min(data_left, rdma_write_max); - send_wr->send_flags = 0; + rdma_wr->wr.send_flags = 0; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - send_wr->opcode = IB_WR_RDMA_WRITE; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; - if (i + 1 == wr->send_wr_num) - send_wr->next = &isert_cmd->tx_desc.send_wr; + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; + rdma_wr->remote_addr = isert_cmd->read_va + offset; + rdma_wr->rkey = isert_cmd->read_stag; + if (i + 1 == wr->rdma_wr_num) + rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr; else - send_wr->next = &wr->send_wr[i + 1]; + rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr; } else { - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; - if (i + 1 == wr->send_wr_num) - send_wr->send_flags = IB_SEND_SIGNALED; + rdma_wr->wr.opcode = IB_WR_RDMA_READ; + rdma_wr->remote_addr = isert_cmd->write_va + va_offset; + rdma_wr->rkey = isert_cmd->write_stag; + if (i + 1 == wr->rdma_wr_num) + rdma_wr->wr.send_flags = IB_SEND_SIGNALED; else - send_wr->next = &wr->send_wr[i + 1]; + rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr; } ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, - send_wr, data_len, offset); + rdma_wr, data_len, offset); ib_sge += ib_sge_cnt; offset += data_len; @@ -2581,8 +2581,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, struct ib_device *ib_dev = device->ib_device; struct ib_mr *mr; struct ib_fast_reg_page_list *frpl; - struct ib_send_wr fr_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; + struct ib_fast_reg_wr fr_wr; + struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; int ret, pagelist_len; u32 page_off; @@ -2620,20 +2620,20 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, /* Prepare FASTREG WR */ memset(&fr_wr, 0, sizeof(fr_wr)); - fr_wr.wr_id = ISER_FASTREG_LI_WRID; - fr_wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off; - fr_wr.wr.fast_reg.page_list = frpl; - fr_wr.wr.fast_reg.page_list_len = pagelist_len; - fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fr_wr.wr.fast_reg.length = mem->len; - fr_wr.wr.fast_reg.rkey = mr->rkey; - fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; + fr_wr.wr.wr_id = ISER_FASTREG_LI_WRID; + fr_wr.wr.opcode = IB_WR_FAST_REG_MR; + fr_wr.iova_start = frpl->page_list[0] + page_off; + fr_wr.page_list = frpl; + fr_wr.page_list_len = pagelist_len; + fr_wr.page_shift = PAGE_SHIFT; + fr_wr.length = mem->len; + fr_wr.rkey = mr->rkey; + fr_wr.access_flags = IB_ACCESS_LOCAL_WRITE; if (!wr) - wr = &fr_wr; + wr = &fr_wr.wr; else - wr->next = &fr_wr; + wr->next = &fr_wr.wr; ret = ib_post_send(isert_conn->qp, wr, &bad_wr); if (ret) { @@ -2714,8 +2714,8 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, struct isert_rdma_wr *rdma_wr, struct fast_reg_descriptor *fr_desc) { - struct ib_send_wr sig_wr, inv_wr; - struct ib_send_wr *bad_wr, *wr = NULL; + struct ib_sig_handover_wr sig_wr; + struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; struct pi_context *pi_ctx = fr_desc->pi_ctx; struct ib_sig_attrs sig_attrs; int ret; @@ -2733,20 +2733,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn, } memset(&sig_wr, 0, sizeof(sig_wr)); - sig_wr.opcode = IB_WR_REG_SIG_MR; - sig_wr.wr_id = ISER_FASTREG_LI_WRID; - sig_wr.sg_list = &rdma_wr->ib_sg[DATA]; - sig_wr.num_sge = 1; - sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; - sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; - sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + sig_wr.wr.opcode = IB_WR_REG_SIG_MR; + sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID; + sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA]; + sig_wr.wr.num_sge = 1; + sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; + sig_wr.sig_attrs = &sig_attrs; + sig_wr.sig_mr = pi_ctx->sig_mr; if (se_cmd->t_prot_sg) - sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT]; + sig_wr.prot = &rdma_wr->ib_sg[PROT]; if (!wr) - wr = &sig_wr; + wr = &sig_wr.wr; else - wr->next = &sig_wr; + wr->next = &sig_wr.wr; ret = ib_post_send(isert_conn->qp, wr, &bad_wr); if (ret) { @@ -2840,7 +2840,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = conn->context; struct fast_reg_descriptor *fr_desc = NULL; - struct ib_send_wr *send_wr; + struct ib_rdma_wr *rdma_wr; struct ib_sge *ib_sg; u32 offset; int ret = 0; @@ -2881,26 +2881,26 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg)); wr->ib_sge = &wr->s_ib_sge; - wr->send_wr_num = 1; - memset(&wr->s_send_wr, 0, sizeof(*send_wr)); - wr->send_wr = &wr->s_send_wr; + wr->rdma_wr_num = 1; + memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr)); + wr->rdma_wr = &wr->s_rdma_wr; wr->isert_cmd = isert_cmd; - send_wr = &isert_cmd->rdma_wr.s_send_wr; - send_wr->sg_list = &wr->s_ib_sge; - send_wr->num_sge = 1; - send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; + rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr; + rdma_wr->wr.sg_list = &wr->s_ib_sge; + rdma_wr->wr.num_sge = 1; + rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - send_wr->opcode = IB_WR_RDMA_WRITE; - send_wr->wr.rdma.remote_addr = isert_cmd->read_va; - send_wr->wr.rdma.rkey = isert_cmd->read_stag; - send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? + rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; + rdma_wr->remote_addr = isert_cmd->read_va; + rdma_wr->rkey = isert_cmd->read_stag; + rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 0 : IB_SEND_SIGNALED; } else { - send_wr->opcode = IB_WR_RDMA_READ; - send_wr->wr.rdma.remote_addr = isert_cmd->write_va; - send_wr->wr.rdma.rkey = isert_cmd->write_stag; - send_wr->send_flags = IB_SEND_SIGNALED; + rdma_wr->wr.opcode = IB_WR_RDMA_READ; + rdma_wr->remote_addr = isert_cmd->write_va; + rdma_wr->rkey = isert_cmd->write_stag; + rdma_wr->wr.send_flags = IB_SEND_SIGNALED; } return 0; @@ -2948,11 +2948,11 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); isert_init_send_wr(isert_conn, isert_cmd, &isert_cmd->tx_desc.send_wr); - isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; - wr->send_wr_num += 1; + isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr; + wr->rdma_wr_num += 1; } - rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); + rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed); if (rc) isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); @@ -2986,7 +2986,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) return rc; } - rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); + rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed); if (rc) isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 6a04ba3c0f72..0a4a7861cce9 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -118,9 +118,9 @@ struct isert_rdma_wr { enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; struct ib_sge s_ib_sge; - int send_wr_num; - struct ib_send_wr *send_wr; - struct ib_send_wr s_send_wr; + int rdma_wr_num; + struct ib_rdma_wr *rdma_wr; + struct ib_rdma_wr s_rdma_wr; struct ib_sge ib_sg[3]; struct isert_data_buf data; struct isert_data_buf prot; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b481490ad257..1390f99ca76b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1306,7 +1306,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; struct ib_send_wr *bad_wr; - struct ib_send_wr wr; + struct ib_fast_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; @@ -1324,17 +1324,17 @@ static int srp_map_finish_fr(struct srp_map_state *state, sizeof(state->pages[0]) * state->npages); memset(&wr, 0, sizeof(wr)); - wr.opcode = IB_WR_FAST_REG_MR; - wr.wr_id = FAST_REG_WR_ID_MASK; - wr.wr.fast_reg.iova_start = state->base_dma_addr; - wr.wr.fast_reg.page_list = desc->frpl; - wr.wr.fast_reg.page_list_len = state->npages; - wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); - wr.wr.fast_reg.length = state->dma_len; - wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - wr.wr.fast_reg.rkey = desc->mr->lkey; + wr.wr.opcode = IB_WR_FAST_REG_MR; + wr.wr.wr_id = FAST_REG_WR_ID_MASK; + wr.iova_start = state->base_dma_addr; + wr.page_list = desc->frpl; + wr.page_list_len = state->npages; + wr.page_shift = ilog2(dev->mr_page_size); + wr.length = state->dma_len; + wr.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + wr.rkey = desc->mr->lkey; *state->fr.next++ = desc; state->nmdesc++; @@ -1342,7 +1342,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, srp_map_desc(state, state->base_dma_addr, state->dma_len, desc->mr->rkey); - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, &bad_wr); } static int srp_finish_mapping(struct srp_map_state *state, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f6fe0414139b..d65533e3a5eb 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2822,7 +2822,7 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; struct ib_send_wr *bad_wr; struct rdma_iu *riu; int i; @@ -2850,29 +2850,29 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, for (i = 0; i < n_rdma; ++i, ++riu) { if (dir == DMA_FROM_DEVICE) { - wr.opcode = IB_WR_RDMA_WRITE; - wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + wr.wr.opcode = IB_WR_RDMA_WRITE; + wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? SRPT_RDMA_WRITE_LAST : SRPT_RDMA_MID, ioctx->ioctx.index); } else { - wr.opcode = IB_WR_RDMA_READ; - wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + wr.wr.opcode = IB_WR_RDMA_READ; + wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? SRPT_RDMA_READ_LAST : SRPT_RDMA_MID, ioctx->ioctx.index); } - wr.next = NULL; - wr.wr.rdma.remote_addr = riu->raddr; - wr.wr.rdma.rkey = riu->rkey; - wr.num_sge = riu->sge_cnt; - wr.sg_list = riu->sge; + wr.wr.next = NULL; + wr.remote_addr = riu->raddr; + wr.rkey = riu->rkey; + wr.wr.num_sge = riu->sge_cnt; + wr.wr.sg_list = riu->sge; /* only get completion event for the last rdma write */ if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) - wr.send_flags = IB_SEND_SIGNALED; + wr.wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(ch->qp, &wr, &bad_wr); + ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); if (ret) break; } @@ -2881,11 +2881,11 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", __func__, __LINE__, ret, i, n_rdma); if (ret && i > 0) { - wr.num_sge = 0; - wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); - wr.send_flags = IB_SEND_SIGNALED; + wr.wr.num_sge = 0; + wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); + wr.wr.send_flags = IB_SEND_SIGNALED; while (ch->state == CH_LIVE && - ib_post_send(ch->qp, &wr, &bad_wr) != 0) { + ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) { pr_info("Trying to abort failed RDMA transfer [%d]\n", ioctx->ioctx.index); msleep(1000); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index f4b6c33ac318..144449122778 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -525,7 +525,7 @@ typedef struct kib_tx /* transmit message */ __u64 tx_msgaddr; /* message buffer (I/O addr) */ DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */ int tx_nwrq; /* # send work items */ - struct ib_send_wr *tx_wrq; /* send work items... */ + struct ib_rdma_wr *tx_wrq; /* send work items... */ struct ib_sge *tx_sge; /* ...and their memory */ kib_rdma_desc_t *tx_rd; /* rdma descriptor */ int tx_nfrags; /* # entries in... */ diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index a23a6d956a4d..a34f1707c167 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -834,7 +834,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit) /* close_conn will launch failover */ rc = -ENETDOWN; } else { - rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq); + rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq); } conn->ibc_last_send = jiffies; @@ -1008,7 +1008,7 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) { kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; + struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; int nob = offsetof(kib_msg_t, ibm_u) + body_nob; struct ib_mr *mr; @@ -1027,12 +1027,12 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) memset(wrq, 0, sizeof(*wrq)); - wrq->next = NULL; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_SEND; - wrq->send_flags = IB_SEND_SIGNALED; + wrq->wr.next = NULL; + wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); + wrq->wr.sg_list = sge; + wrq->wr.num_sge = 1; + wrq->wr.opcode = IB_WR_SEND; + wrq->wr.send_flags = IB_SEND_SIGNALED; tx->tx_nwrq++; } @@ -1044,7 +1044,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, kib_msg_t *ibmsg = tx->tx_msg; kib_rdma_desc_t *srcrd = tx->tx_rd; struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_send_wr *wrq = &tx->tx_wrq[0]; + struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next; int rc = resid; int srcidx; int dstidx; @@ -1090,16 +1090,17 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type, sge->length = wrknob; wrq = &tx->tx_wrq[tx->tx_nwrq]; + next = wrq + 1; - wrq->next = wrq + 1; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; + wrq->wr.next = &next->wr; + wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); + wrq->wr.sg_list = sge; + wrq->wr.num_sge = 1; + wrq->wr.opcode = IB_WR_RDMA_WRITE; + wrq->wr.send_flags = 0; - wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); - wrq->wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx); + wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); + wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx); srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob); dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob); diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c index 86708dee58b1..4c43ca935cc7 100644 --- a/drivers/staging/rdma/amso1100/c2_qp.c +++ b/drivers/staging/rdma/amso1100/c2_qp.c @@ -860,9 +860,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, flags |= SQ_READ_FENCE; } wr.sqwr.rdma_write.remote_stag = - cpu_to_be32(ib_wr->wr.rdma.rkey); + cpu_to_be32(rdma_wr(ib_wr)->rkey); wr.sqwr.rdma_write.remote_to = - cpu_to_be64(ib_wr->wr.rdma.remote_addr); + cpu_to_be64(rdma_wr(ib_wr)->remote_addr); err = move_sgl((struct c2_data_addr *) & (wr.sqwr.rdma_write.data), ib_wr->sg_list, @@ -889,9 +889,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wr.sqwr.rdma_read.local_to = cpu_to_be64(ib_wr->sg_list->addr); wr.sqwr.rdma_read.remote_stag = - cpu_to_be32(ib_wr->wr.rdma.rkey); + cpu_to_be32(rdma_wr(ib_wr)->rkey); wr.sqwr.rdma_read.remote_to = - cpu_to_be64(ib_wr->wr.rdma.remote_addr); + cpu_to_be64(rdma_wr(ib_wr)->remote_addr); wr.sqwr.rdma_read.length = cpu_to_be32(ib_wr->sg_list->length); break; diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c index 47f94984353d..10e2074384f5 100644 --- a/drivers/staging/rdma/ehca/ehca_reqs.c +++ b/drivers/staging/rdma/ehca/ehca_reqs.c @@ -110,19 +110,19 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* need ib_mad struct */ #include -static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +static void trace_ud_wr(const struct ib_ud_wr *ud_wr) { int idx; int j; - while (send_wr) { - struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; - struct ib_sge *sge = send_wr->sg_list; - ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x", idx, send_wr->wr_id, - send_wr->num_sge, send_wr->send_flags, - send_wr->opcode); + while (ud_wr) { + struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr; + struct ib_sge *sge = ud_wr->wr.sg_list; + ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id, + ud_wr->wr.num_sge, ud_wr->wr.send_flags, + ud_wr->.wr.opcode); if (mad_hdr) { - ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x " "mgmt_class=%x class_version=%x method=%x " "status=%x class_specific=%x tid=%lx " "attr_id=%x resv=%x attr_mod=%x", @@ -134,33 +134,33 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->resv, mad_hdr->attr_mod); } - for (j = 0; j < send_wr->num_sge; j++) { + for (j = 0; j < ud_wr->wr.num_sge; j++) { u8 *data = __va(sge->addr); - ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); /* assume length is n*16 */ - ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + ehca_dmp(data, sge->length, "ud_wr#%x sge#%x", idx, j); sge++; } /* eof for j */ idx++; - send_wr = send_wr->next; - } /* eof while send_wr */ + ud_wr = ud_wr(ud_wr->wr.next); + } /* eof while ud_wr */ } #endif /* DEBUG_GSI_SEND_WR */ static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr, + struct ib_send_wr *send_wr, u32 sq_map_idx, int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; - u32 remote_qkey = send_wr->wr.ud.remote_qkey; + u32 remote_qkey; struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || @@ -223,20 +223,21 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* no break is intential here */ case IB_QPT_UD: /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = ud_wr(send_wr)->remote_qkey; + if (remote_qkey & 0x80000000) remote_qkey = qp->qkey; - wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8; wqe_p->local_ee_context_qkey = remote_qkey; - if (unlikely(!send_wr->wr.ud.ah)) { - ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + if (unlikely(!ud_wr(send_wr)->ah)) { + ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp); return -EINVAL; } - if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) { ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); return -EINVAL; } - my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah); wqe_p->u.ud_av.ud_av = my_av->av; /* @@ -255,9 +256,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, qp->qp_type == IB_QPT_GSI) wqe_p->u.ud_av.ud_av.pmtu = 1; if (qp->qp_type == IB_QPT_GSI) { - wqe_p->pkeyi = send_wr->wr.ud.pkey_index; + wqe_p->pkeyi = ud_wr(send_wr)->pkey_index; #ifdef DEBUG_GSI_SEND_WR - trace_send_wr_ud(send_wr); + trace_ud_wr(ud_wr(send_wr)); #endif /* DEBUG_GSI_SEND_WR */ } break; @@ -269,8 +270,8 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, case IB_QPT_RC: /* TODO: atomic not implemented */ wqe_p->u.nud.remote_virtual_address = - send_wr->wr.rdma.remote_addr; - wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + rdma_wr(send_wr)->remote_addr; + wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey; /* * omitted checking of IB_SEND_INLINE diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c index f6eff177ace1..82c21b1c0263 100644 --- a/drivers/staging/rdma/hfi1/keys.c +++ b/drivers/staging/rdma/hfi1/keys.c @@ -358,12 +358,12 @@ bail: /* * Initialize the memory region specified by the work request. */ -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr) +int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr) { struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; struct hfi1_pd *pd = to_ipd(qp->ibqp.pd); struct hfi1_mregion *mr; - u32 rkey = wr->wr.fast_reg.rkey; + u32 rkey = wr->rkey; unsigned i, n, m; int ret = -EINVAL; unsigned long flags; @@ -380,22 +380,22 @@ int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr) if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) goto bail; - if (wr->wr.fast_reg.page_list_len > mr->max_segs) + if (wr->page_list_len > mr->max_segs) goto bail; - ps = 1UL << wr->wr.fast_reg.page_shift; - if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + ps = 1UL << wr->page_shift; + if (wr->length > ps * wr->page_list_len) goto bail; - mr->user_base = wr->wr.fast_reg.iova_start; - mr->iova = wr->wr.fast_reg.iova_start; + mr->user_base = wr->iova_start; + mr->iova = wr->iova_start; mr->lkey = rkey; - mr->length = wr->wr.fast_reg.length; - mr->access_flags = wr->wr.fast_reg.access_flags; - page_list = wr->wr.fast_reg.page_list->page_list; + mr->length = wr->length; + mr->access_flags = wr->access_flags; + page_list = wr->page_list->page_list; m = 0; n = 0; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + for (i = 0; i < wr->page_list_len; i++) { mr->map[m]->segs[n].vaddr = (void *) page_list[i]; mr->map[m]->segs[n].length = ps; if (++n == HFI1_SEGSZ) { diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index df1fa56eaf85..f8c36166962f 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends) if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; } diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 632dd5ba7dfd..fd0ac608c62d 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) goto bail; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp) */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index a4115288db66..d614474770b3 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -481,8 +481,8 @@ again: if (wqe->length == 0) break; if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; qp->r_sge.sg_list = NULL; @@ -494,8 +494,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; release = 0; @@ -512,18 +512,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); hfi1_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; goto send_comp; @@ -913,7 +913,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe, if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount); /* See ch. 11.2.4.1 and 10.7.3.1 */ if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index b536f397737c..6095039c4485 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index d40d1a1e10aa..5a9c784bec04 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->n_pkt_drops++; rcu_read_unlock(); @@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) goto drop; } - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; ppd = ppd_from_ibp(ibp); if (qp->ibqp.qp_num > 1) { @@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) if (qp->ibqp.qp_num) { u32 qkey; - qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey; + qkey = (int)swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey; if (unlikely(qkey != qp->qkey)) { u16 lid; @@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe) if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { if (sqp->ibqp.qp_type == IB_QPT_GSI || sqp->ibqp.qp_type == IB_QPT_SMI) - wc.pkey_index = swqe->wr.wr.ud.pkey_index; + wc.pkey_index = swqe->ud_wr.pkey_index; else wc.pkey_index = sqp->s_pkey_index; } else { @@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) /* Construct the header. */ ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE || ah_attr->dlid == HFI1_PERMISSIVE_LID) { lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); @@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp) bth0 |= IB_BTH_SOLICITED; bth0 |= extra_bytes << 20; if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index); + bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); else bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ qp->s_hdr->ahgcount = 0; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 41bb59eb001c..981e6c1b79a3 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -391,7 +391,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) wr->opcode != IB_WR_SEND_WITH_IMM) return -EINVAL; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) return -EINVAL; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) return -EINVAL; @@ -412,7 +412,24 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) rkt = &to_idev(qp->ibqp.device)->lk_table; pd = to_ipd(qp->ibqp.pd); wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_FAST_REG_MR) + memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), + sizeof(wqe->fast_reg_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; j = 0; if (wr->num_sge) { @@ -438,7 +455,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (wqe->length > 0x80000000U) goto bail_inval_free; } else { - struct hfi1_ah *ah = to_iah(wr->wr.ud.ah); + struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah); atomic_inc(&ah->refcount); } diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index ed903a93baf7..cf5a3c956284 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -348,7 +348,13 @@ struct hfi1_mr { * in qp->s_max_sge. */ struct hfi1_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + struct ib_ud_wr ud_wr; + struct ib_fast_reg_wr fast_reg_wr; + }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ @@ -1025,7 +1031,7 @@ struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list( void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr); +int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr); struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c index 79b3dbc97179..d4aa53574e57 100644 --- a/drivers/staging/rdma/ipath/ipath_rc.c +++ b/drivers/staging/rdma/ipath/ipath_rc.c @@ -350,9 +350,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) goto bail; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); wqe->lpsn = wqe->psn; @@ -401,9 +401,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); @@ -433,21 +433,21 @@ int ipath_make_rc_req(struct ipath_qp *qp) if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.swap); + wqe->atomic_wr.swap); ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); } else { qp->s_state = OP(FETCH_ADD); ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->wr.wr.atomic.compare_add); + wqe->atomic_wr.compare_add); ohdr->u.atomic_eth.compare_data = 0; } ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr >> 32); + wqe->atomic_wr.remote_addr >> 32); ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->wr.wr.atomic.remote_addr); + wqe->atomic_wr.remote_addr); ohdr->u.atomic_eth.rkey = cpu_to_be32( - wqe->wr.wr.atomic.rkey); + wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); ss = NULL; len = 0; @@ -567,9 +567,9 @@ int ipath_make_rc_req(struct ipath_qp *qp) ipath_init_restart(qp, wqe); len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + cpu_to_be64(wqe->rdma_wr.remote_addr + len); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c index 1f95bbaf7602..46af8b03d3d4 100644 --- a/drivers/staging/rdma/ipath/ipath_ruc.c +++ b/drivers/staging/rdma/ipath/ipath_ruc.c @@ -353,8 +353,8 @@ again: if (wqe->length == 0) break; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_WRITE))) goto acc_err; break; @@ -363,8 +363,8 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, - wqe->wr.wr.rdma.remote_addr, - wqe->wr.wr.rdma.rkey, + wqe->rdma_wr.remote_addr, + wqe->rdma_wr.rkey, IB_ACCESS_REMOTE_READ))) goto acc_err; qp->r_sge.sge = wqe->sg_list[0]; @@ -377,18 +377,18 @@ again: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), - wqe->wr.wr.atomic.remote_addr, - wqe->wr.wr.atomic.rkey, + wqe->atomic_wr.remote_addr, + wqe->atomic_wr.rkey, IB_ACCESS_REMOTE_ATOMIC))) goto acc_err; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = wqe->wr.wr.atomic.compare_add; + sdata = wqe->atomic_wr.compare_add; *(u64 *) sqp->s_sge.sge.vaddr = (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); + sdata, wqe->atomic_wr.swap); goto send_comp; default: diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c index 22e60998f1a7..0246b30280b9 100644 --- a/drivers/staging/rdma/ipath/ipath_uc.c +++ b/drivers/staging/rdma/ipath/ipath_uc.c @@ -126,9 +126,9 @@ int ipath_make_uc_req(struct ipath_qp *qp) case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = - cpu_to_be32(wqe->wr.wr.rdma.rkey); + cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / 4; if (len > pmtu) { diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c index e8a2a915251e..3ffc1565d03d 100644 --- a/drivers/staging/rdma/ipath/ipath_ud.c +++ b/drivers/staging/rdma/ipath/ipath_ud.c @@ -65,7 +65,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) u32 rlen; u32 length; - qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); + qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn); if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { dev->n_pkt_drops++; goto done; @@ -77,8 +77,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) * qkey from the QP context instead of the WR (see 10.2.5). */ if (unlikely(qp->ibqp.qp_num && - ((int) swqe->wr.wr.ud.remote_qkey < 0 ? - sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { + ((int) swqe->ud_wr.remote_qkey < 0 ? + sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) { /* XXX OK to lose a count once in a while. */ dev->qkey_violations++; dev->n_pkt_drops++; @@ -175,7 +175,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } else spin_unlock_irqrestore(&rq->lock, flags); - ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(swqe->ud_wr.ah)->attr; if (ah_attr->ah_flags & IB_AH_GRH) { ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); wc.wc_flags |= IB_WC_GRH; @@ -225,7 +225,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, - swqe->wr.send_flags & IB_SEND_SOLICITED); + swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED); drop: if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); @@ -280,7 +280,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) next_cur = 0; /* Construct the header. */ - ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + ah_attr = &to_iah(wqe->ud_wr.ah)->attr; if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { if (ah_attr->dlid != IPATH_PERMISSIVE_LID) dev->n_multicast_xmit++; @@ -322,7 +322,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_wqe = wqe; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; - qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge; if (ah_attr->ah_flags & IB_AH_GRH) { /* Header size in 32-bit words. */ @@ -340,9 +340,9 @@ int ipath_make_ud_req(struct ipath_qp *qp) lrh0 = IPATH_LRH_BTH; ohdr = &qp->s_hdr.u.oth; } - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data; bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; } else bth0 = IB_OPCODE_UD_SEND_ONLY << 24; @@ -360,7 +360,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_hdr.lrh[3] = cpu_to_be16(lid); } else qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; - if (wqe->wr.send_flags & IB_SEND_SOLICITED) + if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED) bth0 |= 1 << 23; bth0 |= extra_bytes << 20; bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : @@ -372,14 +372,14 @@ int ipath_make_ud_req(struct ipath_qp *qp) ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID ? cpu_to_be32(IPATH_MULTICAST_QPN) : - cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + cpu_to_be32(wqe->ud_wr.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? - qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c index ed2bbc2f7eae..15633ec1843f 100644 --- a/drivers/staging/rdma/ipath/ipath_verbs.c +++ b/drivers/staging/rdma/ipath/ipath_verbs.c @@ -374,7 +374,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) wr->opcode != IB_WR_SEND_WITH_IMM) goto bail_inval; /* Check UD destination address PD */ - if (qp->ibqp.pd != wr->wr.ud.ah->pd) + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) goto bail_inval; } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) goto bail_inval; @@ -395,7 +395,23 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) } wqe = get_swqe_ptr(qp, qp->s_head); - wqe->wr = *wr; + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_FAST_REG_MR) + memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), + sizeof(wqe->fast_reg_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + wqe->length = 0; if (wr->num_sge) { acc = wr->opcode >= IB_WR_RDMA_READ ? diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h index ec167e545e15..ed102a26ec08 100644 --- a/drivers/staging/rdma/ipath/ipath_verbs.h +++ b/drivers/staging/rdma/ipath/ipath_verbs.h @@ -277,7 +277,14 @@ struct ipath_mr { * in qp->s_max_sge. */ struct ipath_swqe { - struct ib_send_wr wr; /* don't use wr.sg_list */ + union { + struct ib_send_wr wr; /* don't use wr.sg_list */ + struct ib_ud_wr ud_wr; + struct ib_fast_reg_wr fast_reg_wr; + struct ib_rdma_wr rdma_wr; + struct ib_atomic_wr atomic_wr; + }; + u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7845fae6f2df..25f022c9aaac 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1100,54 +1100,94 @@ struct ib_send_wr { __be32 imm_data; u32 invalidate_rkey; } ex; - union { - struct { - u64 remote_addr; - u32 rkey; - } rdma; - struct { - u64 remote_addr; - u64 compare_add; - u64 swap; - u64 compare_add_mask; - u64 swap_mask; - u32 rkey; - } atomic; - struct { - struct ib_ah *ah; - void *header; - int hlen; - int mss; - u32 remote_qpn; - u32 remote_qkey; - u16 pkey_index; /* valid for GSI only */ - u8 port_num; /* valid for DR SMPs on switch only */ - } ud; - struct { - u64 iova_start; - struct ib_fast_reg_page_list *page_list; - unsigned int page_shift; - unsigned int page_list_len; - u32 length; - int access_flags; - u32 rkey; - } fast_reg; - struct { - struct ib_mw *mw; - /* The new rkey for the memory window. */ - u32 rkey; - struct ib_mw_bind_info bind_info; - } bind_mw; - struct { - struct ib_sig_attrs *sig_attrs; - struct ib_mr *sig_mr; - int access_flags; - struct ib_sge *prot; - } sig_handover; - } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; +struct ib_rdma_wr { + struct ib_send_wr wr; + u64 remote_addr; + u32 rkey; +}; + +static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_rdma_wr, wr); +} + +struct ib_atomic_wr { + struct ib_send_wr wr; + u64 remote_addr; + u64 compare_add; + u64 swap; + u64 compare_add_mask; + u64 swap_mask; + u32 rkey; +}; + +static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_atomic_wr, wr); +} + +struct ib_ud_wr { + struct ib_send_wr wr; + struct ib_ah *ah; + void *header; + int hlen; + int mss; + u32 remote_qpn; + u32 remote_qkey; + u16 pkey_index; /* valid for GSI only */ + u8 port_num; /* valid for DR SMPs on switch only */ +}; + +static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_ud_wr, wr); +} + +struct ib_fast_reg_wr { + struct ib_send_wr wr; + u64 iova_start; + struct ib_fast_reg_page_list *page_list; + unsigned int page_shift; + unsigned int page_list_len; + u32 length; + int access_flags; + u32 rkey; +}; + +static inline struct ib_fast_reg_wr *fast_reg_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_fast_reg_wr, wr); +} + +struct ib_bind_mw_wr { + struct ib_send_wr wr; + struct ib_mw *mw; + /* The new rkey for the memory window. */ + u32 rkey; + struct ib_mw_bind_info bind_info; +}; + +static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_bind_mw_wr, wr); +} + +struct ib_sig_handover_wr { + struct ib_send_wr wr; + struct ib_sig_attrs *sig_attrs; + struct ib_mr *sig_mr; + int access_flags; + struct ib_sge *prot; +}; + +static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_sig_handover_wr, wr); +} + struct ib_recv_wr { struct ib_recv_wr *next; u64 wr_id; diff --git a/net/rds/ib.h b/net/rds/ib.h index aae60fda77f6..301c48385166 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -69,7 +69,11 @@ struct rds_ib_connect_private { struct rds_ib_send_work { void *s_op; - struct ib_send_wr s_wr; + union { + struct ib_send_wr s_wr; + struct ib_rdma_wr s_rdma_wr; + struct ib_atomic_wr s_atomic_wr; + }; struct ib_sge s_sge[RDS_IB_MAX_SGE]; unsigned long s_queued; }; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e88047086b6..987386e9931b 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -785,23 +785,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_queued = jiffies; if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { - send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; - send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; - send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; - send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; - send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; + send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; + send->s_atomic_wr.compare_add = op->op_m_cswp.compare; + send->s_atomic_wr.swap = op->op_m_cswp.swap; + send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask; + send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask; } else { /* FADD */ - send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; - send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; - send->s_wr.wr.atomic.swap = 0; - send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; - send->s_wr.wr.atomic.swap_mask = 0; + send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; + send->s_atomic_wr.compare_add = op->op_m_fadd.add; + send->s_atomic_wr.swap = 0; + send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; + send->s_atomic_wr.swap_mask = 0; } nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); - send->s_wr.num_sge = 1; - send->s_wr.next = NULL; - send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; - send->s_wr.wr.atomic.rkey = op->op_rkey; + send->s_atomic_wr.wr.num_sge = 1; + send->s_atomic_wr.wr.next = NULL; + send->s_atomic_wr.remote_addr = op->op_remote_addr; + send->s_atomic_wr.rkey = op->op_rkey; send->s_op = op; rds_message_addref(container_of(send->s_op, struct rds_message, atomic)); @@ -826,11 +826,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) if (nr_sig) atomic_add(nr_sig, &ic->i_signaled_sends); - failed_wr = &send->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); + failed_wr = &send->s_atomic_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr); rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, - send, &send->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &send->s_wr); + send, &send->s_atomic_wr, ret, failed_wr); + BUG_ON(failed_wr != &send->s_atomic_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -839,9 +839,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) goto out; } - if (unlikely(failed_wr != &send->s_wr)) { + if (unlikely(failed_wr != &send->s_atomic_wr.wr)) { printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); - BUG_ON(failed_wr != &send->s_wr); + BUG_ON(failed_wr != &send->s_atomic_wr.wr); } out: @@ -912,22 +912,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; - send->s_wr.wr.rdma.remote_addr = remote_addr; - send->s_wr.wr.rdma.rkey = op->op_rkey; + send->s_rdma_wr.remote_addr = remote_addr; + send->s_rdma_wr.rkey = op->op_rkey; if (num_sge > max_sge) { - send->s_wr.num_sge = max_sge; + send->s_rdma_wr.wr.num_sge = max_sge; num_sge -= max_sge; } else { - send->s_wr.num_sge = num_sge; + send->s_rdma_wr.wr.num_sge = num_sge; } - send->s_wr.next = NULL; + send->s_rdma_wr.wr.next = NULL; if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr; - for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { + for (j = 0; j < send->s_rdma_wr.wr.num_sge && + scat != &op->op_sg[op->op_count]; j++) { len = ib_sg_dma_len(ic->i_cm_id->device, scat); send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); @@ -942,7 +943,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) } rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_rdma_wr.wr, + send->s_rdma_wr.wr.num_sge, + send->s_rdma_wr.wr.next); prev = send; if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) @@ -963,11 +966,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) if (nr_sig) atomic_add(nr_sig, &ic->i_signaled_sends); - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_rdma_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_rdma_wr.wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -976,9 +979,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) goto out; } - if (unlikely(failed_wr != &first->s_wr)) { + if (unlikely(failed_wr != &first->s_rdma_wr.wr)) { printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); - BUG_ON(failed_wr != &first->s_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); } diff --git a/net/rds/iw.h b/net/rds/iw.h index cbe6674e31ee..fe858e5dd8d1 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -77,7 +77,11 @@ struct rds_iw_send_work { struct ib_fast_reg_page_list *s_page_list; unsigned char s_remap_count; - struct ib_send_wr s_wr; + union { + struct ib_send_wr s_send_wr; + struct ib_rdma_wr s_rdma_wr; + struct ib_fast_reg_wr s_fast_reg_wr; + }; struct ib_sge s_sge[RDS_IW_MAX_SGE]; unsigned long s_queued; }; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 6a8fbd6e69e7..f8a612cc69e6 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -696,7 +696,8 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) { struct rds_iw_mr *ibmr = mapping->m_mr; - struct ib_send_wr f_wr, *failed_wr; + struct ib_fast_reg_wr f_wr; + struct ib_send_wr *failed_wr; int ret; /* @@ -709,22 +710,22 @@ static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) mapping->m_rkey = ibmr->mr->rkey; memset(&f_wr, 0, sizeof(f_wr)); - f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; - f_wr.opcode = IB_WR_FAST_REG_MR; - f_wr.wr.fast_reg.length = mapping->m_sg.bytes; - f_wr.wr.fast_reg.rkey = mapping->m_rkey; - f_wr.wr.fast_reg.page_list = ibmr->page_list; - f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len; - f_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE | + f_wr.wr.wr_id = RDS_IW_FAST_REG_WR_ID; + f_wr.wr.opcode = IB_WR_FAST_REG_MR; + f_wr.length = mapping->m_sg.bytes; + f_wr.rkey = mapping->m_rkey; + f_wr.page_list = ibmr->page_list; + f_wr.page_list_len = mapping->m_sg.dma_len; + f_wr.page_shift = PAGE_SHIFT; + f_wr.access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; - f_wr.wr.fast_reg.iova_start = 0; - f_wr.send_flags = IB_SEND_SIGNALED; + f_wr.iova_start = 0; + f_wr.wr.send_flags = IB_SEND_SIGNALED; - failed_wr = &f_wr; - ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); - BUG_ON(failed_wr != &f_wr); + failed_wr = &f_wr.wr; + ret = ib_post_send(ibmr->cm_id->qp, &f_wr.wr, &failed_wr); + BUG_ON(failed_wr != &f_wr.wr); if (ret) printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index 86152ec3b887..f6e23c515b44 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) send->s_op = NULL; send->s_mapping = NULL; - send->s_wr.next = NULL; - send->s_wr.wr_id = i; - send->s_wr.sg_list = send->s_sge; - send->s_wr.num_sge = 1; - send->s_wr.opcode = IB_WR_SEND; - send->s_wr.send_flags = 0; - send->s_wr.ex.imm_data = 0; + send->s_send_wr.next = NULL; + send->s_send_wr.wr_id = i; + send->s_send_wr.sg_list = send->s_sge; + send->s_send_wr.num_sge = 1; + send->s_send_wr.opcode = IB_WR_SEND; + send->s_send_wr.send_flags = 0; + send->s_send_wr.ex.imm_data = 0; sge = rds_iw_data_sge(ic, send->s_sge); sge->lkey = 0; @@ -179,7 +179,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic) ib_dereg_mr(send->s_mr); BUG_ON(!send->s_page_list); ib_free_fast_reg_page_list(send->s_page_list); - if (send->s_wr.opcode == 0xdead) + if (send->s_send_wr.opcode == 0xdead) continue; if (send->s_rm) rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); @@ -247,7 +247,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) send = &ic->i_sends[oldest]; /* In the error case, wc.opcode sometimes contains garbage */ - switch (send->s_wr.opcode) { + switch (send->s_send_wr.opcode) { case IB_WR_SEND: if (send->s_rm) rds_iw_send_unmap_rm(ic, send, wc.status); @@ -262,12 +262,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) default: printk_ratelimited(KERN_NOTICE "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", - __func__, send->s_wr.opcode); + __func__, send->s_send_wr.opcode); break; } - send->s_wr.opcode = 0xdead; - send->s_wr.num_sge = 1; + send->s_send_wr.opcode = 0xdead; + send->s_send_wr.num_sge = 1; if (time_after(jiffies, send->s_queued + HZ/2)) rds_iw_stats_inc(s_iw_tx_stalled); @@ -455,10 +455,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, WARN_ON(pos != send - ic->i_sends); - send->s_wr.send_flags = send_flags; - send->s_wr.opcode = IB_WR_SEND; - send->s_wr.num_sge = 2; - send->s_wr.next = NULL; + send->s_send_wr.send_flags = send_flags; + send->s_send_wr.opcode = IB_WR_SEND; + send->s_send_wr.num_sge = 2; + send->s_send_wr.next = NULL; send->s_queued = jiffies; send->s_op = NULL; @@ -472,7 +472,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, } else { /* We're sending a packet with no payload. There is only * one SGE */ - send->s_wr.num_sge = 1; + send->s_send_wr.num_sge = 1; sge = &send->s_sge[0]; } @@ -672,23 +672,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, */ if (ic->i_unsignaled_wrs-- == 0) { ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; } ic->i_unsignaled_bytes -= len; if (ic->i_unsignaled_bytes <= 0) { ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; } /* * Always signal the last one if we're stopping due to flow control. */ if (flow_controlled && i == (work_alloc-1)) - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next); sent += len; rm->data.op_dmaoff += len; @@ -722,7 +722,7 @@ add_header: } if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_send_wr.next = &send->s_send_wr; prev = send; pos = (pos + 1) % ic->i_send_ring.w_nr; @@ -736,7 +736,7 @@ add_header: /* if we finished the message then send completion owns it */ if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_rm = ic->i_rm; - prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; ic->i_rm = NULL; } @@ -748,11 +748,11 @@ add_header: rds_iw_send_add_credits(conn, credit_alloc - i); /* XXX need to worry about failed_wr and partial sends. */ - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_send_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_send_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_send_wr); if (ret) { printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); @@ -778,14 +778,14 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd * in the sg list is added to the fast reg page list and placed * inside the fast_reg_mr WR. */ - send->s_wr.opcode = IB_WR_FAST_REG_MR; - send->s_wr.wr.fast_reg.length = len; - send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; - send->s_wr.wr.fast_reg.page_list = send->s_page_list; - send->s_wr.wr.fast_reg.page_list_len = nent; - send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE; - send->s_wr.wr.fast_reg.iova_start = sg_addr; + send->s_fast_reg_wr.wr.opcode = IB_WR_FAST_REG_MR; + send->s_fast_reg_wr.length = len; + send->s_fast_reg_wr.rkey = send->s_mr->rkey; + send->s_fast_reg_wr.page_list = send->s_page_list; + send->s_fast_reg_wr.page_list_len = nent; + send->s_fast_reg_wr.page_shift = PAGE_SHIFT; + send->s_fast_reg_wr.access_flags = IB_ACCESS_REMOTE_WRITE; + send->s_fast_reg_wr.iova_start = sg_addr; ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); } @@ -863,7 +863,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) num_sge = op->op_count; for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { - send->s_wr.send_flags = 0; + send->s_rdma_wr.wr.send_flags = 0; send->s_queued = jiffies; /* @@ -872,7 +872,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) */ if (ic->i_unsignaled_wrs-- == 0) { ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; - send->s_wr.send_flags = IB_SEND_SIGNALED; + send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED; } /* To avoid the need to have the plumbing to invalidate the fastreg_mr used @@ -880,29 +880,30 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. */ if (op->op_write) - send->s_wr.opcode = IB_WR_RDMA_WRITE; + send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; else - send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; - send->s_wr.wr.rdma.remote_addr = remote_addr; - send->s_wr.wr.rdma.rkey = op->op_rkey; + send->s_rdma_wr.remote_addr = remote_addr; + send->s_rdma_wr.rkey = op->op_rkey; send->s_op = op; if (num_sge > rds_iwdev->max_sge) { - send->s_wr.num_sge = rds_iwdev->max_sge; + send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge; num_sge -= rds_iwdev->max_sge; } else - send->s_wr.num_sge = num_sge; + send->s_rdma_wr.wr.num_sge = num_sge; - send->s_wr.next = NULL; + send->s_rdma_wr.wr.next = NULL; if (prev) - prev->s_wr.next = &send->s_wr; + prev->s_send_wr.next = &send->s_rdma_wr.wr; - for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { + for (j = 0; j < send->s_rdma_wr.wr.num_sge && + scat != &op->op_sg[op->op_count]; j++) { len = ib_sg_dma_len(ic->i_cm_id->device, scat); - if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) + if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); else { send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); @@ -917,15 +918,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) scat++; } - if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { - send->s_wr.num_sge = 1; + if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) { + send->s_rdma_wr.wr.num_sge = 1; send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; } rdsdebug("send %p wr %p num_sge %u next %p\n", send, - &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + &send->s_rdma_wr, + send->s_rdma_wr.wr.num_sge, + send->s_rdma_wr.wr.next); prev = send; if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) @@ -934,7 +937,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) /* if we finished the message then send completion owns it */ if (scat == &op->op_sg[op->op_count]) - first->s_wr.send_flags = IB_SEND_SIGNALED; + first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED; if (i < work_alloc) { rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); @@ -953,11 +956,11 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) work_alloc++; } - failed_wr = &first->s_wr; - ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + failed_wr = &first->s_rdma_wr.wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr); rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, - first, &first->s_wr, ret, failed_wr); - BUG_ON(failed_wr != &first->s_wr); + first, &first->s_rdma_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_rdma_wr.wr); if (ret) { printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " "returned %d\n", &conn->c_faddr, ret); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 5318951b3b53..0d2f46f600b6 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -312,7 +312,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_send_wr fastreg_wr, *bad_wr; + struct ib_fast_reg_wr fastreg_wr; + struct ib_send_wr *bad_wr; u8 key; int len, pageoff; int i, rc; @@ -358,23 +359,23 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, __func__, mw, i, len); memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.page_list_len = page_no; - fastreg_wr.wr.fast_reg.length = len; - fastreg_wr.wr.fast_reg.access_flags = writing ? + fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.iova_start = seg1->mr_dma + pageoff; + fastreg_wr.page_list = frmr->fr_pgl; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.page_list_len = page_no; + fastreg_wr.length = len; + fastreg_wr.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.wr.fast_reg.rkey = mr->rkey; + fastreg_wr.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); if (rc) goto out_senderr; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cb5174284074..7be42d0da19e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); int ret, read, pno; @@ -179,16 +179,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); memset(&read_wr, 0, sizeof(read_wr)); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = pages_needed; - - ret = svc_rdma_send(xprt, &read_wr); + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.wr.opcode; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -218,9 +218,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_send_wr fastreg_wr; + struct ib_fast_reg_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); @@ -289,31 +289,31 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - fastreg_wr.next = &read_wr; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.iova_start = (unsigned long)frmr->kva; + fastreg_wr.page_list = frmr->page_list; + fastreg_wr.page_list_len = frmr->page_list_len; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.length = frmr->map_len; + fastreg_wr.access_flags = frmr->access_flags; + fastreg_wr.rkey = frmr->mr->lkey; + fastreg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = 1; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; - read_wr.wr_id = (unsigned long)ctxt; - read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { - read_wr.opcode = IB_WR_RDMA_READ; - read_wr.next = &inv_wr; + read_wr.wr.opcode = IB_WR_RDMA_READ; + read_wr.wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; @@ -321,10 +321,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } - ctxt->wr_op = read_wr.opcode; + ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr); + ret = svc_rdma_send(xprt, &fastreg_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1dfae8317065..969a1ab75fc3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 xdr_off, int write_len, struct svc_rdma_req_map *vec) { - struct ib_send_wr write_wr; + struct ib_rdma_wr write_wr; struct ib_sge *sge; int xdr_sge_no; int sge_no; @@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; - write_wr.wr_id = (unsigned long)ctxt; - write_wr.sg_list = &sge[0]; - write_wr.num_sge = sge_no; - write_wr.opcode = IB_WR_RDMA_WRITE; - write_wr.send_flags = IB_SEND_SIGNALED; - write_wr.wr.rdma.rkey = rmr; - write_wr.wr.rdma.remote_addr = to; + write_wr.wr.wr_id = (unsigned long)ctxt; + write_wr.wr.sg_list = &sge[0]; + write_wr.wr.num_sge = sge_no; + write_wr.wr.opcode = IB_WR_RDMA_WRITE; + write_wr.wr.send_flags = IB_SEND_SIGNALED; + write_wr.rkey = rmr; + write_wr.remote_addr = to; /* Post It */ atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr)) + if (svc_rdma_send(xprt, &write_wr.wr)) goto err; return write_len - bc; err: -- cgit v1.2.3 From 25556ae6b965321c7e7469faa06ddbeae50dac91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Aug 2015 14:58:43 +0200 Subject: IB: remove xrc_remote_srq_num from struct ib_send_wr The field is only initialized in mlx, but never used. If we want to add proper XRC support it should be done with a new struct ib_xrc_wr. This shrinks the various WR structures by another 4 bytes. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Tested-by: Haggai Eran --- drivers/infiniband/hw/mlx5/qp.c | 1 - include/rdma/ib_verbs.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d4c36af4270f..9bad68820061 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2627,7 +2627,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; - xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); seg += sizeof(*xrc); size += sizeof(*xrc) / 16; /* fall through */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 25f022c9aaac..edf02908a0fd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1100,7 +1100,6 @@ struct ib_send_wr { __be32 imm_data; u32 invalidate_rkey; } ex; - u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; struct ib_rdma_wr { -- cgit v1.2.3 From 2c79dad8953626b8b808a353bc661c9f321a64c0 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Thu, 15 Oct 2015 21:15:15 +0000 Subject: usnic: correctly check failed allocation Since ib_alloc_device returns allocated memory address, not error, it should be checked as IS_NULL, not IS_ERR_OR_NULL. Signed-off-by: Insu Yun Reviewed-by: Dave Goodell Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 0c15bd885035..565c881a44ba 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev) netdev = pci_get_drvdata(dev); us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); - if (IS_ERR_OR_NULL(us_ibdev)) { + if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); + return ERR_PTR(-EFAULT); } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); - if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { - usnic_err("Failed to alloc ufdev for %s with err %ld\n", - pci_name(dev), PTR_ERR(us_ibdev->ufdev)); + if (!us_ibdev->ufdev) { + usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev)); goto err_dealloc; } -- cgit v1.2.3 From 35ef4a9ebfe3392da8ca025c8c16365c9da472e5 Mon Sep 17 00:00:00 2001 From: "Dave Goodell \\(dgoodell\\)" Date: Thu, 15 Oct 2015 20:01:29 -0700 Subject: MAINTAINERS: update usnic maintainer contacts Upinder hasn't worked for Cisco for a little while now, updating to more active maintainers. Signed-off-by: Dave Goodell Signed-off-by: Doug Ledford --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9f6685f6c5a9..2ac939e35169 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2730,9 +2730,10 @@ S: Supported F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER -M: Upinder Malhi +M: Christian Benvenuti +M: Dave Goodell S: Supported -F: drivers/infiniband/hw/usnic +F: drivers/infiniband/hw/usnic/ CIRRUS LOGIC EP93XX ETHERNET DRIVER M: Hartley Sweeten -- cgit v1.2.3 From fe274c5aed1b5e311e0e83306572b70312c0313a Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Mon, 19 Oct 2015 16:57:10 +0000 Subject: usnic: correctly handle kzalloc return value Since kzalloc returns memory address, not error code, it should be checked whether it is null or not. Signed-off-by: Insu Yun Reviewed-by: Dave Goodell Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 85dc3f989ff7..fcea3a24d3eb 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, /* Create Flow Handle */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; @@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp, /* Create qp_flow */ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); - if (IS_ERR_OR_NULL(qp_flow)) { - err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + if (!qp_flow) { + err = -ENOMEM; goto out_dealloc_flow; } qp_flow->flow = flow; -- cgit v1.2.3 From 68a5e6043655a56cdb13b06b3f63d39791fe18eb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Oct 2015 17:00:05 +0800 Subject: IB/iser: fix a comment typo Just fix a typo in the code comment. Signed-off-by: Geliang Tang Acked-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index f58ff96b6cbb..8f2f1057370c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -111,7 +111,7 @@ module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO); MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); /* - * iscsi_iser_recv() - Process a successfull recv completion + * iscsi_iser_recv() - Process a successful recv completion * @conn: iscsi connection * @hdr: iscsi header * @rx_data: buffer containing receive data payload -- cgit v1.2.3 From 931cf9a3e55c7a4c0d7340140643ce6fdf7b6e96 Mon Sep 17 00:00:00 2001 From: Hal Rosenstock Date: Wed, 30 Sep 2015 15:04:29 -0400 Subject: ib_pack.h: Fix commentary IBA reference for CNP in IB opcode enum IBA spec is now 1.3 not 3.1 and vol 1 should be mentioned as there is also vol 2. Signed-off-by: Hal Rosenstock Signed-off-by: Doug Ledford --- include/rdma/ib_pack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 709a5331e6b9..e99d8f9a4551 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -76,7 +76,7 @@ enum { IB_OPCODE_UC = 0x20, IB_OPCODE_RD = 0x40, IB_OPCODE_UD = 0x60, - /* per IBTA 3.1 Table 38, A10.3.2 */ + /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ IB_OPCODE_CNP = 0x80, /* operations -- just used to define real constants */ -- cgit v1.2.3 From b61e564af85bde408456f779eb267a37a64dc522 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Oct 2015 14:10:04 +0200 Subject: RDMA/cxgb4: re-fix 32-bit build warning Casting a pointer to __be64 produces a warning on 32-bit architectures: drivers/infiniband/hw/cxgb4/mem.c:147:20: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] req->wr.wr_lo = (__force __be64)&wr_wait; This was fixed at least twice for this driver in different places, and accidentally reverted once more. This puts the correct version back in place. Signed-off-by: Arnd Bergmann Fixes: 6198dd8d7a6a7 ("iw_cxgb4: 32b platform fixes") Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 026b91ebd5e2..140415d31bcc 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, if (i == (num_wqe-1)) { req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | FW_WR_COMPL_F); - req->wr.wr_lo = (__force __be64)&wr_wait; + req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait; } else req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); req->wr.wr_mid = cpu_to_be32( -- cgit v1.2.3 From 5d1e623591dfaa64a59ecdac420adc16125524d4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Oct 2015 14:29:51 +0200 Subject: IB/core: avoid 32-bit warning The INIT_UDATA() macro requires a pointer or unsigned long argument for both input and output buffer, and all callers had a cast from when the code was merged until a recent restructuring, so now we get core/uverbs_cmd.c: In function 'ib_uverbs_create_cq': core/uverbs_cmd.c:1481:66: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] This makes the code behave as before by adding back the cast to unsigned long. Signed-off-by: Arnd Bergmann Fixes: 565197dd8fb1 ("IB/core: Extend ib_uverbs_create_cq") Reviewed-by: Yann Droneaud Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index be4cb9f04be3..88b3b78340f2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; - INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), -- cgit v1.2.3 From 99718e59fa8425753a8ec2aceb9ec3faa865eeb6 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 8 Sep 2015 09:56:57 +0530 Subject: iw_cxgb4: detect fatal errors while creating listening filters In c4iw_create_listen(), if we're using listen filters, then bail out of the busy loop if the device becomes fatally dead Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index debc39d2cbc2..24efe52f1d68 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3260,6 +3260,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) sin->sin_addr.s_addr, sin->sin_port, 0, ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); if (err == -EBUSY) { + if (c4iw_fatal_error(&ep->com.dev->rdev)) { + err = -EIO; + break; + } set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(100)); } -- cgit v1.2.3 From 158c776dba1f6b5eb2e3d5327cda719dc4820c1c Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 8 Sep 2015 09:56:58 +0530 Subject: iw_cxgb4: pass the ord/ird in connect reply events This allows client ULPs to get the negotiated ord/ird which is useful to avoid stalling the SQ due to exceeding the ORD. Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 24efe52f1d68..a6aae913850c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1196,6 +1196,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1203,6 +1205,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) sizeof(struct mpa_v2_conn_params); } else { /* this means MPA_v1 is used */ + event.ord = cur_max_read_depth(ep->com.dev); + event.ird = cur_max_read_depth(ep->com.dev); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); -- cgit v1.2.3 From f57b780c0000a56b5e51df5f94a35828bb5e6a7a Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 8 Sep 2015 09:56:59 +0530 Subject: iw_cxgb4: fix misuse of ep->ord for minimum ird calculation When calculating the minimum ird in c4iw_accept_cr(), we need to always have a value of at least 1 if the RTR message is a 0B read. The code was incorrectly using ep->ord for this logic which was incorrectly adjusting the ird and causing incorrect ord/ird negotiation when using MPAv2 to negotiate these values. Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a6aae913850c..fa3ee5971f35 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2933,7 +2933,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (peer2peer && (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && - (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0) + (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0) ep->ird = 1; } -- cgit v1.2.3 From 3dd9a5dc24c0bfb1453965a22f5234367a8936e3 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Tue, 8 Sep 2015 09:57:00 +0530 Subject: iw_cxgb4: reverse the ord/ird in the ESTABLISHED upcall The ESTABLISHED event should have the peer's ord/ird so swap the values in the event before the upcall. Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index fa3ee5971f35..6c022117d11e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1269,8 +1269,8 @@ static void established_upcall(struct c4iw_ep *ep) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; - event.ird = ep->ird; - event.ord = ep->ord; + event.ird = ep->ord; + event.ord = ep->ird; if (ep->com.cm_id) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); -- cgit v1.2.3 From 5a85f5e9d41bfd8a1fb5136d5ea2815fc75a5271 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 20 Oct 2015 14:17:56 +0530 Subject: RDMA/ocrdma: Cleanup unused device list and rcu variables ocrdma_dev_list is not used by the driver. So removing the references of this variable. dev->rcu was introduced for the ipv6 notifier for GID management. This is no longer required as the GID management is outside the HW driver. Signed-off-by: Padmanabh Ratnakar Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma.h | 1 - drivers/infiniband/hw/ocrdma/ocrdma_main.c | 16 ++-------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index b4091ab48db0..8cd57c77e122 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -278,7 +278,6 @@ struct ocrdma_dev { u32 hba_port_num; struct list_head entry; - struct rcu_head rcu; int id; u64 *stag_arr; u8 sl; /* service level */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 87aa55df7c82..cb1af0fb4051 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -static LIST_HEAD(ocrdma_dev_list); -static DEFINE_SPINLOCK(ocrdma_devlist_lock); static DEFINE_IDR(ocrdma_dev_id); void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) @@ -325,9 +323,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) goto sysfs_err; - spin_lock(&ocrdma_devlist_lock); - list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); - spin_unlock(&ocrdma_devlist_lock); /* Init stats */ ocrdma_add_port_stats(dev); /* Interrupt Moderation */ @@ -356,9 +351,8 @@ idr_err: return NULL; } -static void ocrdma_remove_free(struct rcu_head *rcu) +static void ocrdma_remove_free(struct ocrdma_dev *dev) { - struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); idr_remove(&ocrdma_dev_id, dev->id); kfree(dev->mbx_cmd); @@ -375,15 +369,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev) ib_unregister_device(&dev->ibdev); ocrdma_rem_port_stats(dev); - - spin_lock(&ocrdma_devlist_lock); - list_del_rcu(&dev->entry); - spin_unlock(&ocrdma_devlist_lock); - ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); - - call_rcu(&dev->rcu, ocrdma_remove_free); + ocrdma_remove_free(dev); } static int ocrdma_open(struct ocrdma_dev *dev) -- cgit v1.2.3 From fb16d8c49e6fa2791620cf2cd61d6daa123ed51e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 20 Oct 2015 14:17:57 +0530 Subject: RDMA/ocrdma: Avoid a possible crash in ocrdma_rem_port_stats debugfs_remove should be called before freeing the driver stats resources to avoid any crash during ocrdma_remove. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 69334e214571..86c303a620c1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -855,9 +855,9 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev) { if (!dev->dir) return; + debugfs_remove(dev->dir); mutex_destroy(&dev->stats_lock); ocrdma_release_stats_mem(dev); - debugfs_remove(dev->dir); } void ocrdma_init_debugfs(void) -- cgit v1.2.3 From aeb922df2c6fc3fc21fd7ec4b9ae7e845e541450 Mon Sep 17 00:00:00 2001 From: Naga Irrinki Date: Tue, 20 Oct 2015 14:17:58 +0530 Subject: RDMA/ocrdma: Check resource ids received in Async CQE Some versions of the FW sends wrong QP or CQ IDs in the Async CQE. Adding a check to see whether qp or cq structures associated with the CQE is valid. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index aab391a15db4..9d9914299866 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -678,11 +678,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, int dev_event = 0; int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK; + u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK; - if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) - qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK]; - if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) - cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + /* + * Some FW version returns wrong qp or cq ids in CQEs. + * Checking whether the IDs are valid + */ + + if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) { + if (qpid < dev->attr.max_qp) + qp = dev->qp_tbl[qpid]; + if (qp == NULL) { + pr_err("ocrdma%d:Async event - qpid %u is not valid\n", + dev->id, qpid); + return; + } + } + + if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) { + if (cqid < dev->attr.max_cq) + cq = dev->cq_tbl[cqid]; + if (cq == NULL) { + pr_err("ocrdma%d:Async event - cqid %u is not valid\n", + dev->id, cqid); + return; + } + } memset(&ib_evt, 0, sizeof(ib_evt)); -- cgit v1.2.3 From af74d1956f780253d643a15f30d6aaa1c2e4836d Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 20 Oct 2015 14:17:59 +0530 Subject: RDMA/ocrdma: Prevent CQ-Doorbell floods Changing CQ-Doorbell(DB) logic to prevent DB floods, it is supposed to be pressed only if any hw CQE is polled. If cq-arm was requested previously then don't bother about number of hw CQEs polled and arm the CQ. Signed-off-by: Devesh Sharma Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 1f3affb6a477..1c4e83d5d153 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2933,16 +2933,11 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (cq->deferred_arm) { - ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, - polled_hw_cqes); + if (cq->deferred_arm || polled_hw_cqes) { + ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm, + cq->deferred_sol, polled_hw_cqes); cq->deferred_arm = false; cq->deferred_sol = false; - } else { - /* We need to pop the CQE. No need to arm */ - ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, - polled_hw_cqes); - cq->deferred_sol = false; } return i; -- cgit v1.2.3 From c6a7b0d7a5105ae2e0454b18ce2989be59142afe Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 20 Oct 2015 14:18:00 +0530 Subject: RDMA/ocrdma: Bump up ocrdma version number to 11.0.0.0 Updating the version number to 11.0.0.0 Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 8cd57c77e122..8f047f996e22 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -55,7 +55,7 @@ #include #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" +#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" -- cgit v1.2.3 From 279998059729422fc0d551b5deb266a174cbab47 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 23 Sep 2015 17:19:26 +0530 Subject: cxgb4: T6 adapter lld support for iw_cxgb4 driver Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 41 +---------- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 22 ++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 + drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h | 85 +++++++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 48 +++++++++++++ 5 files changed, 158 insertions(+), 40 deletions(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index fa0c7b54ec7a..11045ec8d94c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -48,6 +48,7 @@ #include #include #include +#include "t4_chip_type.h" #include "cxgb4_uld.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) @@ -290,31 +291,6 @@ struct pci_params { unsigned char width; }; -#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision)) -#define CHELSIO_CHIP_FPGA 0x100 -#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf) -#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf) - -#define CHELSIO_T4 0x4 -#define CHELSIO_T5 0x5 -#define CHELSIO_T6 0x6 - -enum chip_type { - T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1), - T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2), - T4_FIRST_REV = T4_A1, - T4_LAST_REV = T4_A2, - - T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0), - T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1), - T5_FIRST_REV = T5_A0, - T5_LAST_REV = T5_A1, - - T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0), - T6_FIRST_REV = T6_A0, - T6_LAST_REV = T6_A0, -}; - struct devlog_params { u32 memtype; /* which memory (EDC0, EDC1, MC) */ u32 start; /* start of log in firmware memory */ @@ -905,21 +881,6 @@ static inline int is_offload(const struct adapter *adap) return adap->params.offload; } -static inline int is_t6(enum chip_type chip) -{ - return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6; -} - -static inline int is_t5(enum chip_type chip) -{ - return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5; -} - -static inline int is_t4(enum chip_type chip) -{ - return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4; -} - static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f5dcde27e402..aeeb21ff0de2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1935,6 +1935,28 @@ unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus, } EXPORT_SYMBOL(cxgb4_best_aligned_mtu); +/** + * cxgb4_tp_smt_idx - Get the Source Mac Table index for this VI + * @chip: chip type + * @viid: VI id of the given port + * + * Return the SMT index for this VI. + */ +unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid) +{ + /* In T4/T5, SMT contains 256 SMAC entries organized in + * 128 rows of 2 entries each. + * In T6, SMT contains 256 SMAC entries in 256 rows. + * TODO: The below code needs to be updated when we add support + * for 256 VFs. + */ + if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5) + return ((viid & 0x7f) << 1); + else + return (viid & 0x7f); +} +EXPORT_SYMBOL(cxgb4_tp_smt_idx); + /** * cxgb4_port_chan - get the HW channel of a port * @dev: the net device for the port diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index c3a8be5541e7..cf711d5f15be 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -40,6 +40,7 @@ #include #include #include +#include "cxgb4.h" /* CPL message priority levels */ enum { @@ -290,6 +291,7 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); unsigned int cxgb4_port_viid(const struct net_device *dev); +unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid); unsigned int cxgb4_port_idx(const struct net_device *dev); unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu, unsigned int *idx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h new file mode 100644 index 000000000000..54b718111e3f --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h @@ -0,0 +1,85 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2015 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __T4_CHIP_TYPE_H__ +#define __T4_CHIP_TYPE_H__ + +#define CHELSIO_T4 0x4 +#define CHELSIO_T5 0x5 +#define CHELSIO_T6 0x6 + +/* We code the Chelsio T4 Family "Chip Code" as a tuple: + * + * (Chip Version, Chip Revision) + * + * where: + * + * Chip Version: is T4, T5, etc. + * Chip Revision: is the FAB "spin" of the Chip Version. + */ +#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision)) +#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf) +#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf) + +enum chip_type { + T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1), + T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2), + T4_FIRST_REV = T4_A1, + T4_LAST_REV = T4_A2, + + T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0), + T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1), + T5_FIRST_REV = T5_A0, + T5_LAST_REV = T5_A1, + + T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0), + T6_FIRST_REV = T6_A0, + T6_LAST_REV = T6_A0, +}; + +static inline int is_t4(enum chip_type chip) +{ + return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4); +} + +static inline int is_t5(enum chip_type chip) +{ + return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5); +} + +static inline int is_t6(enum chip_type chip) +{ + return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6); +} + +#endif /* __T4_CHIP_TYPE_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b99144afd4ec..a072d341e205 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -417,6 +417,21 @@ struct cpl_t5_act_open_req { __be64 params; }; +struct cpl_t6_act_open_req { + WR_HDR; + union opcode_tid ot; + __be16 local_port; + __be16 peer_port; + __be32 local_ip; + __be32 peer_ip; + __be64 opt0; + __be32 rsvd; + __be32 opt2; + __be64 params; + __be32 rsvd2; + __be32 opt3; +}; + struct cpl_act_open_req6 { WR_HDR; union opcode_tid ot; @@ -446,6 +461,23 @@ struct cpl_t5_act_open_req6 { __be64 params; }; +struct cpl_t6_act_open_req6 { + WR_HDR; + union opcode_tid ot; + __be16 local_port; + __be16 peer_port; + __be64 local_ip_hi; + __be64 local_ip_lo; + __be64 peer_ip_hi; + __be64 peer_ip_lo; + __be64 opt0; + __be32 rsvd; + __be32 opt2; + __be64 params; + __be32 rsvd2; + __be32 opt3; +}; + struct cpl_act_open_rpl { union opcode_tid ot; __be32 atid_status; @@ -504,6 +536,19 @@ struct cpl_pass_establish { #define TCPOPT_MSS_M 0xF #define TCPOPT_MSS_G(x) (((x) >> TCPOPT_MSS_S) & TCPOPT_MSS_M) +#define T6_TCP_HDR_LEN_S 8 +#define T6_TCP_HDR_LEN_V(x) ((x) << T6_TCP_HDR_LEN_S) +#define T6_TCP_HDR_LEN_G(x) (((x) >> T6_TCP_HDR_LEN_S) & TCP_HDR_LEN_M) + +#define T6_IP_HDR_LEN_S 14 +#define T6_IP_HDR_LEN_V(x) ((x) << T6_IP_HDR_LEN_S) +#define T6_IP_HDR_LEN_G(x) (((x) >> T6_IP_HDR_LEN_S) & IP_HDR_LEN_M) + +#define T6_ETH_HDR_LEN_S 24 +#define T6_ETH_HDR_LEN_M 0xFF +#define T6_ETH_HDR_LEN_V(x) ((x) << T6_ETH_HDR_LEN_S) +#define T6_ETH_HDR_LEN_G(x) (((x) >> T6_ETH_HDR_LEN_S) & T6_ETH_HDR_LEN_M) + struct cpl_act_establish { union opcode_tid ot; __be32 rsvd; @@ -833,6 +878,9 @@ struct cpl_rx_pkt { __be16 err_vec; }; +#define RX_T6_ETHHDR_LEN_M 0xFF +#define RX_T6_ETHHDR_LEN_G(x) (((x) >> RX_ETHHDR_LEN_S) & RX_T6_ETHHDR_LEN_M) + #define RXF_PSH_S 20 #define RXF_PSH_V(x) ((x) << RXF_PSH_S) #define RXF_PSH_F RXF_PSH_V(1U) -- cgit v1.2.3 From 963cab508296a06ed8063c848f32d74f2b4b4c26 Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Wed, 23 Sep 2015 17:19:27 +0530 Subject: iw_cxgb4: Adds support for T6 adapter Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 317 ++++++++++++++++++++------------- drivers/infiniband/hw/cxgb4/device.c | 10 +- drivers/infiniband/hw/cxgb4/provider.c | 2 +- drivers/infiniband/hw/cxgb4/qp.c | 16 +- drivers/infiniband/hw/cxgb4/t4.h | 5 +- 5 files changed, 204 insertions(+), 146 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 6c022117d11e..c9cffced00ca 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu, static int send_connect(struct c4iw_ep *ep) { - struct cpl_act_open_req *req; - struct cpl_t5_act_open_req *t5_req; - struct cpl_act_open_req6 *req6; - struct cpl_t5_act_open_req6 *t5_req6; + struct cpl_act_open_req *req = NULL; + struct cpl_t5_act_open_req *t5req = NULL; + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req6 *req6 = NULL; + struct cpl_t5_act_open_req6 *t5req6 = NULL; + struct cpl_t6_act_open_req6 *t6req6 = NULL; struct sk_buff *skb; u64 opt0; u32 opt2; unsigned int mtu_idx; int wscale; - int wrlen; - int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req) : - sizeof(struct cpl_t5_act_open_req); - int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? - sizeof(struct cpl_act_open_req6) : - sizeof(struct cpl_t5_act_open_req6); + int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.mapped_local_addr; struct sockaddr_in *ra = (struct sockaddr_in *) @@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep) &ep->com.mapped_local_addr; struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; - int win; int ret; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; + u32 isn = (prandom_u32() & ~7UL) - 1; + + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + sizev4 = sizeof(struct cpl_act_open_req); + sizev6 = sizeof(struct cpl_act_open_req6); + break; + case CHELSIO_T5: + sizev4 = sizeof(struct cpl_t5_act_open_req); + sizev6 = sizeof(struct cpl_t5_act_open_req6); + break; + case CHELSIO_T6: + sizev4 = sizeof(struct cpl_t6_act_open_req); + sizev6 = sizeof(struct cpl_t6_act_open_req6); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + return -EINVAL; + } wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? roundup(sizev4, 16) : @@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN_F; if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN_F; - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { + if (peer2peer) + isn += 4; + opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); opt2 |= T5_ISS_F; @@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep) t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); - if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { - if (ep->com.remote_addr.ss_family == AF_INET) { - req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + if (ep->com.remote_addr.ss_family == AF_INET) { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: + req = (struct cpl_act_open_req *)skb_put(skb, wrlen); INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - req->local_port = la->sin_port; - req->peer_port = ra->sin_port; - req->local_ip = la->sin_addr.s_addr; - req->peer_ip = ra->sin_addr.s_addr; - req->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req = (struct cpl_t5_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req, 0); + req = (struct cpl_act_open_req *)t5req; + break; + case CHELSIO_T6: + t6req = (struct cpl_t6_act_open_req *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + t5req = (struct cpl_t5_act_open_req *)t6req; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid<<14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req->opt2 = cpu_to_be32(opt2); } else { + t5req->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); + t5req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req->rsvd); + t5req->opt2 = cpu_to_be32(opt2); + } + } else { + switch (CHELSIO_CHIP_VERSION(adapter_type)) { + case CHELSIO_T4: req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); - INIT_TP_WR(req6, 0); - OPCODE_TID(req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - req6->local_port = la6->sin6_port; - req6->peer_port = ra6->sin6_port; - req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - req6->opt0 = cpu_to_be64(opt0); + break; + case CHELSIO_T5: + t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t5req6, 0); + req6 = (struct cpl_act_open_req6 *)t5req6; + break; + case CHELSIO_T6: + t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb, + wrlen); + INIT_TP_WR(t6req6, 0); + req6 = (struct cpl_act_open_req6 *)t6req6; + t5req6 = (struct cpl_t5_act_open_req6 *)t6req6; + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(adapter_type)); + ret = -EINVAL; + goto clip_release; + } + + OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { req6->params = cpu_to_be32(cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t)); req6->opt2 = cpu_to_be32(opt2); - } - } else { - u32 isn = (prandom_u32() & ~7UL) - 1; - - if (peer2peer) - isn += 4; - - if (ep->com.remote_addr.ss_family == AF_INET) { - t5_req = (struct cpl_t5_act_open_req *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req, 0); - OPCODE_TID(t5_req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, - ((ep->rss_qid << 14) | ep->atid))); - t5_req->local_port = la->sin_port; - t5_req->peer_port = ra->sin_port; - t5_req->local_ip = la->sin_addr.s_addr; - t5_req->peer_ip = ra->sin_addr.s_addr; - t5_req->opt0 = cpu_to_be64(opt0); - t5_req->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( - ep->com.dev->rdev.lldi.ports[0], - ep->l2t))); - t5_req->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req->rsvd)); - t5_req->opt2 = cpu_to_be32(opt2); } else { - t5_req6 = (struct cpl_t5_act_open_req6 *) - skb_put(skb, wrlen); - INIT_TP_WR(t5_req6, 0); - OPCODE_TID(t5_req6) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, - ((ep->rss_qid<<14)|ep->atid))); - t5_req6->local_port = la6->sin6_port; - t5_req6->peer_port = ra6->sin6_port; - t5_req6->local_ip_hi = *((__be64 *) - (la6->sin6_addr.s6_addr)); - t5_req6->local_ip_lo = *((__be64 *) - (la6->sin6_addr.s6_addr + 8)); - t5_req6->peer_ip_hi = *((__be64 *) - (ra6->sin6_addr.s6_addr)); - t5_req6->peer_ip_lo = *((__be64 *) - (ra6->sin6_addr.s6_addr + 8)); - t5_req6->opt0 = cpu_to_be64(opt0); - t5_req6->params = cpu_to_be64(FILTER_TUPLE_V( - cxgb4_select_ntuple( + t5req6->params = cpu_to_be64(FILTER_TUPLE_V( + cxgb4_select_ntuple( ep->com.dev->rdev.lldi.ports[0], ep->l2t))); - t5_req6->rsvd = cpu_to_be32(isn); - PDBG("%s snd_isn %u\n", __func__, - be32_to_cpu(t5_req6->rsvd)); - t5_req6->opt2 = cpu_to_be32(opt2); + t5req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd); + t5req6->opt2 = cpu_to_be32(opt2); } } set_bit(ACT_OPEN_REQ, &ep->com.history); ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +clip_release: if (ret && ep->com.remote_addr.ss_family == AF_INET6) cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], (const u32 *)&la6->sin6_addr.s6_addr, 1); @@ -1902,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, struct dst_entry *dst, struct c4iw_dev *cdev, - bool clear_mpa_v1) + bool clear_mpa_v1, enum chip_type adapter_type) { struct neighbour *n; int err, step; @@ -1937,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -1956,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, + cxgb4_port_viid(pdev)); step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2029,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) err = -EHOSTUNREACH; goto fail3; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false, + ep->com.dev->rdev.lldi.adapter_type); if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); goto fail4; @@ -2217,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, int wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; + enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); skb_get(skb); rpl = cplhdr(skb); - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (!is_t4(adapter_type)) { skb_trim(skb, roundup(sizeof(*rpl5), 16)); rpl5 = (void *)rpl; INIT_TP_WR(rpl5, ep->hwtid); @@ -2270,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, const struct tcphdr *tcph; u32 hlen = ntohl(req->hdr_len); - tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + - IP_HDR_LEN_G(hlen); + if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5) + tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + + IP_HDR_LEN_G(hlen); + else + tcph = (const void *)(req + 1) + + T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen); if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN_V(1); } - if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) { u32 isn = (prandom_u32() & ~7UL) - 1; opt2 |= T5_OPT_2_VALID_F; opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); @@ -2306,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, +static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, __be16 *peer_port) { - int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); struct tcphdr *tcp = (struct tcphdr *) @@ -2366,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); + get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, + local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2401,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false, + parent_ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -3193,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail2; } - err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true, + ep->com.dev->rdev.lldi.adapter_type); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); goto fail3; @@ -3601,20 +3642,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) { - u32 l2info; - u16 vlantag, len, hdr_len, eth_hdr_len; + __be32 l2info; + __be16 hdr_len, vlantag, len; + u16 eth_hdr_len; + int tcp_hdr_len, ip_hdr_len; u8 intf; struct cpl_rx_pkt *cpl = cplhdr(skb); struct cpl_pass_accept_req *req; struct tcp_options_received tmp_opt; struct c4iw_dev *dev; + enum chip_type type; dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = (__force u16) cpl->vlan; - len = (__force u16) cpl->len; - l2info = (__force u32) cpl->l2info; - hdr_len = (__force u16) cpl->hdr_len; + vlantag = cpl->vlan; + len = cpl->len; + l2info = cpl->l2info; + hdr_len = cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -3631,20 +3675,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(SYN_INTF_V(intf) | SYN_MAC_IDX_V(RX_MACIDX_G( - (__force int) htonl(l2info))) | + be32_to_cpu(l2info))) | SYN_XACT_MATCH_F); - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G((__force int)htonl(l2info)) : - RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info)); - req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G( - (__force int) htonl(l2info))) | - TCP_HDR_LEN_V(RX_TCPHDR_LEN_G( - (__force int) htons(hdr_len))) | - IP_HDR_LEN_V(RX_IPHDR_LEN_G( - (__force int) htons(hdr_len))) | - ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len))); - req->vlan = (__force __be16) vlantag; - req->len = (__force __be16) len; + type = dev->rdev.lldi.adapter_type; + tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len)); + ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len)); + req->hdr_len = + cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info)))); + if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) { + eth_hdr_len = is_t4(type) ? + RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) : + RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) | + IP_HDR_LEN_V(ip_hdr_len) | + ETH_HDR_LEN_V(eth_hdr_len)); + } else { /* T6 and later */ + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info)); + req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) | + T6_IP_HDR_LEN_V(ip_hdr_len) | + T6_ETH_HDR_LEN_V(eth_hdr_len)); + } + req->vlan = vlantag; + req->len = len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | PASS_OPEN_TOS_V(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -3763,9 +3815,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? - RX_ETHHDR_LEN_G(htonl(cpl->l2info)) : - RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info)); + switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) { + case CHELSIO_T4: + eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T5: + eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + case CHELSIO_T6: + eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info)); + break; + default: + pr_err("T%d Chip is not supported\n", + CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)); + goto reject; + } + if (eth_hdr_len == ETH_HLEN) { eh = (struct ethhdr *)(req + 1); iph = (struct iphdr *)(eh + 1); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 1a297391b54c..58fce1742b8d 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -962,12 +962,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.sge_egrstatuspagesize; /* - * For T5 devices, we map all of BAR2 with WC. + * For T5/T6 devices, we map all of BAR2 with WC. * For T4 devices with onchip qp mem, we map only that part * of BAR2 with WC. */ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); - if (is_t5(devp->rdev.lldi.adapter_type)) { + if (!is_t4(devp->rdev.lldi.adapter_type)) { devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, pci_resource_len(devp->rdev.lldi.pdev, 2)); if (!devp->rdev.bar2_kva) { @@ -1267,11 +1267,9 @@ static int enable_qp_db(int id, void *p, void *data) static void resume_rc_qp(struct c4iw_qp *qp) { spin_lock(&qp->lock); - t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL); qp->wq.sq.wq_pidx_inc = 0; - t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, - is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL); qp->wq.rq.wq_pidx_inc = 0; spin_unlock(&qp->lock); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 7746113552e7..df3b3f1ad066 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if (addr >= rdev->oc_mw_pa) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else { - if (is_t5(rdev->lldi.adapter_type)) + if (!is_t4(rdev->lldi.adapter_type)) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); else diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 6517e1208ccb..acfc2f22b382 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -712,8 +712,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_sq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_sq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.sq.wq_pidx_inc += inc; @@ -730,8 +729,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) spin_lock_irqsave(&qhp->rhp->lock, flags); spin_lock(&qhp->lock); if (qhp->rhp->db_state == NORMAL) - t4_ring_rq_db(&qhp->wq, inc, - is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + t4_ring_rq_db(&qhp->wq, inc, NULL); else { add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); qhp->wq.rq.wq_pidx_inc += inc; @@ -817,7 +815,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, - is_t5( + !is_t4( qhp->rhp->rdev.lldi.adapter_type) ? 1 : 0); break; @@ -860,8 +858,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_sq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -934,8 +931,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } if (!qhp->rhp->rdev.status_page->db_off) { - t4_ring_rq_db(&qhp->wq, idx, - is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + t4_ring_rq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { spin_unlock_irqrestore(&qhp->lock, flag); @@ -1875,7 +1871,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; - if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) return -EINVAL; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 274a7ab13bef..1092a2d1f607 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, - union t4_wr *wqe) +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { /* Flush host queue memory writes. */ @@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db); } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, union t4_recv_wr *wqe) { -- cgit v1.2.3 From 6d8a74972b7115b41d4d17a4444c026755ca24be Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 21 Oct 2015 17:00:42 +0300 Subject: IB/core: Extend ib_uverbs_create_qp ib_uverbs_ex_create_qp follows the extension verbs mechanism. New features (for example, QP creation flags field which is added in a downstream patch) could used via user-space libraries without breaking the ABI. Signed-off-by: Eran Ben Elisha Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 254 +++++++++++++++++++++++++--------- drivers/infiniband/core/uverbs_main.c | 1 + include/uapi/rdma/ib_user_verbs.h | 26 ++++ 4 files changed, 217 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 3863d33c243d..94bbd8c155fc 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); +IB_UVERBS_DECLARE_EX_CMD(create_qp); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 88b3b78340f2..b2a37d545e0b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) -{ - struct ib_uverbs_create_qp cmd; - struct ib_uverbs_create_qp_resp resp; - struct ib_udata udata; - struct ib_uqp_object *obj; - struct ib_device *device; - struct ib_pd *pd = NULL; - struct ib_xrcd *xrcd = NULL; - struct ib_uobject *uninitialized_var(xrcd_uobj); - struct ib_cq *scq = NULL, *rcq = NULL; - struct ib_srq *srq = NULL; - struct ib_qp *qp; - struct ib_qp_init_attr attr; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; +static int create_qp(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw, + struct ib_uverbs_ex_create_qp *cmd, + size_t cmd_sz, + int (*cb)(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *udata), + void *context) +{ + struct ib_uqp_object *obj; + struct ib_device *device; + struct ib_pd *pd = NULL; + struct ib_xrcd *xrcd = NULL; + struct ib_uobject *uninitialized_var(xrcd_uobj); + struct ib_cq *scq = NULL, *rcq = NULL; + struct ib_srq *srq = NULL; + struct ib_qp *qp; + char *buf; + struct ib_qp_init_attr attr; + struct ib_uverbs_ex_create_qp_resp resp; + int ret; - if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) return -EPERM; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); - obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, + &qp_lock_class); down_write(&obj->uevent.uobject.mutex); - if (cmd.qp_type == IB_QPT_XRC_TGT) { - xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); + if (cmd->qp_type == IB_QPT_XRC_TGT) { + xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, + &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put; } device = xrcd->device; } else { - if (cmd.qp_type == IB_QPT_XRC_INI) { - cmd.max_recv_wr = cmd.max_recv_sge = 0; + if (cmd->qp_type == IB_QPT_XRC_INI) { + cmd->max_recv_wr = 0; + cmd->max_recv_sge = 0; } else { - if (cmd.is_srq) { - srq = idr_read_srq(cmd.srq_handle, file->ucontext); + if (cmd->is_srq) { + srq = idr_read_srq(cmd->srq_handle, + file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; } } - if (cmd.recv_cq_handle != cmd.send_cq_handle) { - rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (cmd->recv_cq_handle != cmd->send_cq_handle) { + rcq = idr_read_cq(cmd->recv_cq_handle, + file->ucontext, 0); if (!rcq) { ret = -EINVAL; goto err_put; @@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } } - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); rcq = rcq ?: scq; - pd = idr_read_pd(cmd.pd_handle, file->ucontext); + pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd || !scq) { ret = -EINVAL; goto err_put; @@ -1825,31 +1824,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.recv_cq = rcq; attr.srq = srq; attr.xrcd = xrcd; - attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - attr.qp_type = cmd.qp_type; + attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + attr.qp_type = cmd->qp_type; attr.create_flags = 0; - attr.cap.max_send_wr = cmd.max_send_wr; - attr.cap.max_recv_wr = cmd.max_recv_wr; - attr.cap.max_send_sge = cmd.max_send_sge; - attr.cap.max_recv_sge = cmd.max_recv_sge; - attr.cap.max_inline_data = cmd.max_inline_data; + attr.cap.max_send_wr = cmd->max_send_wr; + attr.cap.max_recv_wr = cmd->max_recv_wr; + attr.cap.max_send_sge = cmd->max_send_sge; + attr.cap.max_recv_sge = cmd->max_recv_sge; + attr.cap.max_inline_data = cmd->max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); - if (cmd.qp_type == IB_QPT_XRC_TGT) + if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + + sizeof(cmd->create_flags)) + attr.create_flags = cmd->create_flags; + + if (attr.create_flags) { + ret = -EINVAL; + goto err_put; + } + + buf = (void *)cmd + sizeof(*cmd); + if (cmd_sz > sizeof(*cmd)) + if (!(buf[0] == 0 && !memcmp(buf, buf + 1, + cmd_sz - sizeof(*cmd) - 1))) { + ret = -EINVAL; + goto err_put; + } + + if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else - qp = device->create_qp(pd, &attr, &udata); + qp = device->create_qp(pd, &attr, uhw); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } - if (cmd.qp_type != IB_QPT_XRC_TGT) { + if (cmd->qp_type != IB_QPT_XRC_TGT) { qp->real_qp = qp; qp->device = device; qp->pd = pd; @@ -1875,19 +1892,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_destroy; memset(&resp, 0, sizeof resp); - resp.qpn = qp->qp_num; - resp.qp_handle = obj->uevent.uobject.id; - resp.max_recv_sge = attr.cap.max_recv_sge; - resp.max_send_sge = attr.cap.max_send_sge; - resp.max_recv_wr = attr.cap.max_recv_wr; - resp.max_send_wr = attr.cap.max_send_wr; - resp.max_inline_data = attr.cap.max_inline_data; + resp.base.qpn = qp->qp_num; + resp.base.qp_handle = obj->uevent.uobject.id; + resp.base.max_recv_sge = attr.cap.max_recv_sge; + resp.base.max_send_sge = attr.cap.max_send_sge; + resp.base.max_recv_wr = attr.cap.max_recv_wr; + resp.base.max_send_wr = attr.cap.max_send_wr; + resp.base.max_inline_data = attr.cap.max_inline_data; - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) { - ret = -EFAULT; - goto err_copy; - } + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + + ret = cb(file, &resp, ucore); + if (ret) + goto err_cb; if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, @@ -1913,9 +1931,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, up_write(&obj->uevent.uobject.mutex); - return in_len; - -err_copy: + return 0; +err_cb: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: @@ -1937,6 +1954,113 @@ err_put: return ret; } +static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) + return -EFAULT; + + return 0; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_ex_create_qp cmd_ex; + struct ib_udata ucore; + struct ib_udata uhw; + ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); + int err; + + if (out_len < resp_size) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), + resp_size); + INIT_UDATA(&uhw, buf + sizeof(cmd), + (unsigned long)cmd.response + resp_size, + in_len - sizeof(cmd), out_len - resp_size); + + memset(&cmd_ex, 0, sizeof(cmd_ex)); + cmd_ex.user_handle = cmd.user_handle; + cmd_ex.pd_handle = cmd.pd_handle; + cmd_ex.send_cq_handle = cmd.send_cq_handle; + cmd_ex.recv_cq_handle = cmd.recv_cq_handle; + cmd_ex.srq_handle = cmd.srq_handle; + cmd_ex.max_send_wr = cmd.max_send_wr; + cmd_ex.max_recv_wr = cmd.max_recv_wr; + cmd_ex.max_send_sge = cmd.max_send_sge; + cmd_ex.max_recv_sge = cmd.max_recv_sge; + cmd_ex.max_inline_data = cmd.max_inline_data; + cmd_ex.sq_sig_all = cmd.sq_sig_all; + cmd_ex.qp_type = cmd.qp_type; + cmd_ex.is_srq = cmd.is_srq; + + err = create_qp(file, &ucore, &uhw, &cmd_ex, + offsetof(typeof(cmd_ex), is_srq) + + sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, + NULL); + + if (err) + return err; + + return in_len; +} + +static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, + struct ib_uverbs_ex_create_qp_resp *resp, + struct ib_udata *ucore) +{ + if (ib_copy_to_udata(ucore, resp, resp->response_length)) + return -EFAULT; + + return 0; +} + +int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_create_qp_resp resp; + struct ib_uverbs_ex_create_qp cmd = {0}; + int err; + + if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + + sizeof(cmd.comp_mask))) + return -EINVAL; + + err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (err) + return err; + + if (cmd.comp_mask) + return -EINVAL; + + if (cmd.reserved) + return -EINVAL; + + if (ucore->outlen < (offsetof(typeof(resp), response_length) + + sizeof(resp.response_length))) + return -ENOSPC; + + err = create_qp(file, ucore, uhw, &cmd, + min(ucore->inlen, sizeof(cmd)), + ib_uverbs_ex_create_qp_cb, NULL); + + if (err) + return err; + + return 0; +} + ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c29a660c72fe..e3ef28861be6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, + [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 978841eeaff1..8126c143a519 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -92,6 +92,7 @@ enum { enum { IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, }; @@ -516,6 +517,25 @@ struct ib_uverbs_create_qp { __u64 driver_data[0]; }; +struct ib_uverbs_ex_create_qp { + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u32 comp_mask; + __u32 create_flags; +}; + struct ib_uverbs_open_qp { __u64 response; __u64 user_handle; @@ -538,6 +558,12 @@ struct ib_uverbs_create_qp_resp { __u32 reserved; }; +struct ib_uverbs_ex_create_qp_resp { + struct ib_uverbs_create_qp_resp base; + __u32 comp_mask; + __u32 response_length; +}; + /* * This struct needs to remain a multiple of 8 bytes to keep the * alignment of the modify QP parameters. -- cgit v1.2.3 From ddf9529be19cb3674bd59c5b2a3375503663bba8 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 15 Oct 2015 14:44:37 +0300 Subject: IB/core: Allow setting create flags in QP init attribute Allow setting IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK at create_flags in ib_uverbs_create_qp_ex. Signed-off-by: Eran Ben Elisha Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b2a37d545e0b..ece9f4c5e4a4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1843,7 +1843,7 @@ static int create_qp(struct ib_uverbs_file *file, sizeof(cmd->create_flags)) attr.create_flags = cmd->create_flags; - if (attr.create_flags) { + if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { ret = -EINVAL; goto err_put; } -- cgit v1.2.3 From 9a8928359736ab170303ee8a2cc15db54e3a4a8f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Oct 2015 14:44:38 +0300 Subject: net/mlx4_core: Add support for filtering multicast loopback Update device capabilities regarding HW filtering multicast loopback support. Add MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB attribute to mlx4_update_qp to enable changing QP context to support filtering incoming multicast loopback traffic according the sender's counter index. Set the corresponding bits in QP context to force the loopback source checks if attribute is given and HW supports it. Signed-off-by: Maor Gottlieb Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++++ drivers/net/ethernet/mellanox/mlx4/qp.c | 19 +++++++++++++- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 30 +++++++++++++++++----- include/linux/mlx4/device.h | 2 ++ include/linux/mlx4/qp.h | 24 +++++++++++++---- 5 files changed, 68 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e8ec1dec5789..b3be3a060311 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [27] = "Port beacon support", [28] = "RX-ALL support", [29] = "802.1ad offload support", + [31] = "Modifying loopback source checks using UPDATE_QP support", + [32] = "Loopback source checks support", }; int i; @@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; + if (field32 & (1 << 18)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB; + if (field32 & (1 << 19)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK; if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 20268634a9ab..b16249577aa2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, cmd->qp_context.pri_path.grh_mylmc = params->smac_index; } + if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) { + if (!(dev->caps.flags2 + & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, + "Trying to set src check LB, but it isn't supported\n"); + err = -ENOTSUPP; + goto out; + } + pri_addr_path_mask |= + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB; + if (params->flags & + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) { + cmd->qp_context.pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + } + } + if (attr & MLX4_UPDATE_QP_VSD) { qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD; if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE) @@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0, MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - +out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 731423ca575d..502f3350088e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev, } } + /* preserve IF_COUNTER flag */ + qpc->pri_path.vlan_control &= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE && dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED | @@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; } else { /* priority tagged */ - qpc->pri_path.vlan_control = + qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } @@ -3762,9 +3765,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); orig_sched_queue = qpc->pri_path.sched_queue; - err = update_vport_qp_param(dev, inbox, slave, qpn); - if (err) - return err; err = get_res(dev, slave, qpn, RES_QP, &qp); if (err) @@ -3774,6 +3774,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, goto out; } + err = update_vport_qp_param(dev, inbox, slave, qpn); + if (err) + goto out; + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: /* if no error, save sched queue value passed in by VF. This is @@ -4208,7 +4212,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave, } -#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX) +#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \ + 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\ + 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB) int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -4231,6 +4237,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) return -EPERM; + if ((pri_addr_path_mask & + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && + !(dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { + mlx4_warn(dev, + "Src check LB for slave %d isn't supported\n", + slave); + return -ENOTSUPP; + } + /* Just change the smac for the QP */ err = get_res(dev, slave, qpn, RES_QP, &rqp); if (err) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index baad4cb8e9b0..dac6872dbaea 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -214,6 +214,8 @@ enum { MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29, MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, + MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, }; enum { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index de45a51b3f04..fe052e234906 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -135,7 +135,10 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 vlan_control; + union { + u8 vlan_control; + u8 control; + }; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; @@ -156,9 +159,16 @@ struct mlx4_qp_path { }; enum { /* fl */ - MLX4_FL_CV = 1 << 6, - MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2 + MLX4_FL_CV = 1 << 6, + MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, + MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, + MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, }; + +enum { /* control */ + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7, +}; + enum { /* vlan_control */ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */ @@ -254,6 +264,8 @@ enum { MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, }; enum { /* param3 */ @@ -436,11 +448,13 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1 }; enum mlx4_update_qp_params_flags { - MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1, }; struct mlx4_update_qp_params { -- cgit v1.2.3 From 74194fb9c80cedb3130d26802c52deec3caebc75 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Oct 2015 14:44:39 +0300 Subject: net/mlx4_en: Implement mcast loopback prevention for ETH qps Set the mcast loopback prevention bit in the QPC for ETH MLX QPs (not RSS QPs), when the firmware supports this feature. In addition, all rx ring QPs need to be updated in order not to enforce loopback checks. This prevents getting packets we sent both from the network stack and the HCA. Loopback prevention is done by comparing the counter indices of the sent and receiving QPs. If they're equal, packets aren't loopback-ed. Signed-off-by: Maor Gottlieb Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- drivers/net/ethernet/mellanox/mlx4/en_main.c | 22 ++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_resources.c | 25 +++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 ++- 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index a946e4bf71d2..005f910ec955 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -123,6 +123,28 @@ void mlx4_en_update_loopback_state(struct net_device *dev, */ if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback) priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; + + mutex_lock(&priv->mdev->state_lock); + if (priv->mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB && + priv->rss_map.indir_qp.qpn) { + int i; + int err = 0; + int loopback = !!(features & NETIF_F_LOOPBACK); + + for (i = 0; i < priv->rx_ring_num; i++) { + int ret; + + ret = mlx4_en_change_mcast_lb(priv, + &priv->rss_map.qps[i], + loopback); + if (!err) + err = ret; + } + if (err) + mlx4_warn(priv->mdev, "failed to change mcast loopback\n"); + } + mutex_unlock(&priv->mdev->state_lock); } static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index e482fa1bb741..12aab5a659d3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -69,6 +69,15 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, context->pri_path.counter_index = priv->counter_index; context->cqn_send = cpu_to_be32(cqn); context->cqn_recv = cpu_to_be32(cqn); + if (!rss && + (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) && + context->pri_path.counter_index != + MLX4_SINK_COUNTER_INDEX(mdev->dev)) { + /* disable multicast loopback to qp with same counter */ + if (!(dev->features & NETIF_F_LOOPBACK)) + context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) context->param3 |= cpu_to_be32(1 << 30); @@ -80,6 +89,22 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, } } +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback) +{ + int ret; + struct mlx4_update_qp_params qp_params; + + memset(&qp_params, 0, sizeof(qp_params)); + if (!loopback) + qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB; + + ret = mlx4_update_qp(priv->mdev->dev, qp->qpn, + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB, + &qp_params); + + return ret; +} int mlx4_en_map_buffer(struct mlx4_buf *buf) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index defcf8c395bf..c41f15102ae0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -798,7 +798,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); - +int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, + int loopback); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); -- cgit v1.2.3 From 3ba8e31d5a4343fa042c976a9ce9c3c16946c92d Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 15 Oct 2015 14:44:40 +0300 Subject: IB/mlx4: Add IB counters table This is an infrastructure step for allocating and attaching more than one counter to QPs on the same port. Allocate a counters table and manage the insertion and removals of the counters in load and unload of mlx4 IB. Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 25 ++++++++++---- drivers/infiniband/hw/mlx4/main.c | 63 ++++++++++++++++++++++++++++-------- drivers/infiniband/hw/mlx4/mlx4_ib.h | 9 +++++- drivers/infiniband/hw/mlx4/qp.c | 8 +++-- 4 files changed, 81 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1cd75ff02251..68f256716785 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -824,18 +824,29 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, { struct mlx4_counter counter_stats; struct mlx4_ib_dev *dev = to_mdev(ibdev); - int err; + struct counter_index *tmp_counter; + int err = IB_MAD_RESULT_FAILURE, stats_avail = 0; if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) return -EINVAL; memset(&counter_stats, 0, sizeof(counter_stats)); - err = mlx4_get_counter_stats(dev->dev, - dev->counters[port_num - 1].index, - &counter_stats, 0); - if (err) - err = IB_MAD_RESULT_FAILURE; - else { + mutex_lock(&dev->counters_table[port_num - 1].mutex); + list_for_each_entry(tmp_counter, + &dev->counters_table[port_num - 1].counters_list, + list) { + err = mlx4_get_counter_stats(dev->dev, + tmp_counter->index, + &counter_stats, 0); + if (err) { + err = IB_MAD_RESULT_FAILURE; + stats_avail = 0; + break; + } + stats_avail = 1; + } + mutex_unlock(&dev->counters_table[port_num - 1].mutex); + if (stats_avail) { memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 38be8dc2932e..232b104c1d04 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1249,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) return 0; } +static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev, + struct mlx4_ib_counters *ctr_table) +{ + struct counter_index *counter, *tmp_count; + + mutex_lock(&ctr_table->mutex); + list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list, + list) { + if (counter->allocated) + mlx4_counter_free(ibdev->dev, counter->index); + list_del(&counter->list); + kfree(counter); + } + mutex_unlock(&ctr_table->mutex); +} + int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, union ib_gid *gid) { @@ -2133,6 +2149,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int num_req_counters; int allocated; u32 counter_index; + struct counter_index *new_counter_index = NULL; pr_info_once("%s", mlx4_ib_version); @@ -2304,6 +2321,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; + for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->counters_table[i].mutex); + INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + } + num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; for (i = 0; i < num_req_counters; ++i) { mutex_init(&ibdev->qp1_proxy_lock[i]); @@ -2322,15 +2344,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) counter_index = mlx4_get_default_counter_index(dev, i + 1); } - ibdev->counters[i].index = counter_index; - ibdev->counters[i].allocated = allocated; + new_counter_index = kmalloc(sizeof(*new_counter_index), + GFP_KERNEL); + if (!new_counter_index) { + if (allocated) + mlx4_counter_free(ibdev->dev, counter_index); + goto err_counter; + } + new_counter_index->index = counter_index; + new_counter_index->allocated = allocated; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = counter_index; pr_info("counter index %d for port %d allocated %d\n", counter_index, i + 1, allocated); } if (mlx4_is_bonded(dev)) for (i = 1; i < ibdev->num_ports ; ++i) { - ibdev->counters[i].index = ibdev->counters[0].index; - ibdev->counters[i].allocated = 0; + new_counter_index = + kmalloc(sizeof(struct counter_index), + GFP_KERNEL); + if (!new_counter_index) + goto err_counter; + new_counter_index->index = counter_index; + new_counter_index->allocated = 0; + list_add_tail(&new_counter_index->list, + &ibdev->counters_table[i].counters_list); + ibdev->counters_table[i].default_counter = + counter_index; } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) @@ -2439,12 +2480,9 @@ err_steer_qp_release: mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); err_counter: - for (i = 0; i < ibdev->num_ports; ++i) { - if (ibdev->counters[i].index != -1 && - ibdev->counters[i].allocated) - mlx4_counter_free(ibdev->dev, - ibdev->counters[i].index); - } + for (i = 0; i < ibdev->num_ports; ++i) + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); + err_map: iounmap(ibdev->uar_map); @@ -2548,9 +2586,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) - if (ibdev->counters[p].index != -1 && - ibdev->counters[p].allocated) - mlx4_counter_free(ibdev->dev, ibdev->counters[p].index); + mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]); + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1e7b23bb2eb0..4056dc1c57cd 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -528,10 +528,17 @@ struct mlx4_ib_iov_port { }; struct counter_index { + struct list_head list; u32 index; u8 allocated; }; +struct mlx4_ib_counters { + struct list_head counters_list; + struct mutex mutex; /* mutex for accessing counters list */ + u32 default_counter; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -550,7 +557,7 @@ struct mlx4_ib_dev { struct mutex cap_mask_mutex; bool ib_active; struct mlx4_ib_iboe iboe; - struct counter_index counters[MLX4_MAX_PORTS]; + struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS]; int *eq_table; struct kobject *iov_parent; struct kobject *ports_parent; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 4ad9be3ad61c..382913e35486 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1460,6 +1460,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int sqd_event; int steer_qp = 0; int err = -EINVAL; + int counter_index; /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && @@ -1543,9 +1544,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { - if (dev->counters[qp->port - 1].index != -1) { - context->pri_path.counter_index = - dev->counters[qp->port - 1].index; + counter_index = + dev->counters_table[qp->port - 1].default_counter; + if (counter_index != -1) { + context->pri_path.counter_index = counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; } else context->pri_path.counter_index = -- cgit v1.2.3 From 7b59f0f9516040157450443b9df591556c0c49a9 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 15 Oct 2015 14:44:41 +0300 Subject: IB/mlx4: Add counter based implementation for QP multicast loopback block Current implementation for MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK is not supported when link layer is Ethernet. This patch will add counter based implementation for multicast loopback prevention. HW can drop multicast loopback packets if sender QP counter index is equal to receiver QP counter index. If qp flag MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK is set and link layer is Ethernet, create a new counter and attach it to the QP so it will continue receiving multicast loopback traffic but it's own. The decision if to create a new counter is being made at the qp modification to RTR after the QP's port is set. When QP is destroyed or moved back to reset state, delete the counter. Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 + drivers/infiniband/hw/mlx4/qp.c | 67 ++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 4056dc1c57cd..086416a863ae 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -320,6 +320,7 @@ struct mlx4_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + struct counter_index *counter_index; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 382913e35486..28719494ad6e 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -617,6 +617,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) return 0; } +static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp) +{ + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + mlx4_counter_free(dev->dev, qp->counter_index->index); + list_del(&qp->counter_index->list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + kfree(qp->counter_index); + qp->counter_index = NULL; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, @@ -1189,6 +1201,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); } + if (mqp->counter_index) + mlx4_ib_free_qp_counter(dev, mqp); + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1447,6 +1462,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp * return 0; } +static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct counter_index *new_counter_index; + int err; + u32 tmp_idx; + + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) != + IB_LINK_LAYER_ETHERNET || + !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) || + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) + return 0; + + err = mlx4_counter_alloc(dev->dev, &tmp_idx); + if (err) + return err; + + new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL); + if (!new_counter_index) { + mlx4_counter_free(dev->dev, tmp_idx); + return -ENOMEM; + } + + new_counter_index->index = tmp_idx; + new_counter_index->allocated = 1; + qp->counter_index = new_counter_index; + + mutex_lock(&dev->counters_table[qp->port - 1].mutex); + list_add_tail(&new_counter_index->list, + &dev->counters_table[qp->port - 1].counters_list); + mutex_unlock(&dev->counters_table[qp->port - 1].mutex); + + return 0; +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1520,6 +1569,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; + if (new_state == IB_QPS_RESET && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); @@ -1544,11 +1596,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + err = create_qp_lb_counter(dev, qp); + if (err) + goto out; + counter_index = dev->counters_table[qp->port - 1].default_counter; + if (qp->counter_index) + counter_index = qp->counter_index->index; + if (counter_index != -1) { context->pri_path.counter_index = counter_index; optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; + if (qp->counter_index) { + context->pri_path.fl |= + MLX4_FL_ETH_SRC_CHECK_MC_LB; + context->pri_path.vlan_control |= + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER; + } } else context->pri_path.counter_index = MLX4_SINK_COUNTER_INDEX(dev->dev); @@ -1850,6 +1915,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } out: + if (err && qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); -- cgit v1.2.3 From fbfb6625ea2d1bd535db03838df381768a2d6865 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 15 Oct 2015 14:44:42 +0300 Subject: IB/mlx4: Add support for blocking multicast loopback QP creation user flag MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK is now supported downstream. In addition, this flag was supported only for IB_QPT_UD, now, with the new implementation it is supported for all QP types. Support IB_USER_VERBS_EX_CMD_CREATE_QP in order to get the flag from user space using the extension create qp command. Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 ++- drivers/infiniband/hw/mlx4/qp.c | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 232b104c1d04..8779d26d1e61 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2312,7 +2312,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); mlx4_ib_alloc_eqs(dev, ibdev); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 28719494ad6e..ba25a1bfa52f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -758,9 +758,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } else { qp->sq_no_prefetch = 0; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; @@ -834,6 +831,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) + qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -1098,6 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, { struct mlx4_ib_qp *qp = NULL; int err; + int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; gfp_t gfp; @@ -1121,8 +1122,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, } if (init_attr->create_flags && - (udata || - ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && + ((udata && init_attr->create_flags & ~(sup_u_create_flags)) || + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | + MLX4_IB_QP_CREATE_USE_GFP_NOIO | + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) -- cgit v1.2.3 From 55ee3ab2e49a9ead850722ef47698243dd226d16 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:45 +0300 Subject: IB/core: Add netdev and gid attributes paramteres to cache Adding an ability to query the IB cache by a netdev and get the attributes of a GID. These parameters are necessary in order to successfully resolve the required GID (when the netdevice is known) and get the Ethernet L2 attributes from a GID. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 10 ++++++---- drivers/infiniband/core/cm.c | 5 +++-- drivers/infiniband/core/cma.c | 10 ++++++---- drivers/infiniband/core/device.c | 17 ++++++++++++----- drivers/infiniband/core/mad.c | 2 +- drivers/infiniband/core/multicast.c | 3 ++- drivers/infiniband/core/sa_query.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/core/verbs.c | 7 ++++--- drivers/infiniband/hw/mlx4/main.c | 4 ++-- drivers/infiniband/hw/mlx4/qp.c | 5 +++-- drivers/infiniband/hw/mthca/mthca_av.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 ++- include/rdma/ib_cache.h | 13 +++++++++---- include/rdma/ib_verbs.h | 5 +++-- 19 files changed, 60 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 87471ef37198..5c054072ef20 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -649,21 +649,23 @@ static int gid_table_setup_one(struct ib_device *ib_dev) int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, - union ib_gid *gid) + union ib_gid *gid, + struct ib_gid_attr *gid_attr) { if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; - return __ib_cache_gid_get(device, port_num, index, gid, NULL); + return __ib_cache_gid_get(device, port_num, index, gid, gid_attr); } EXPORT_SYMBOL(ib_get_cached_gid); int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, + struct net_device *ndev, u8 *port_num, u16 *index) { - return ib_cache_gid_find(device, gid, NULL, port_num, index); + return ib_cache_gid_find(device, gid, ndev, port_num, index); } EXPORT_SYMBOL(ib_find_cached_gid); @@ -845,7 +847,7 @@ static void ib_cache_update(struct ib_device *device, if (!use_roce_gid_table) { for (i = 0; i < gid_cache->table_len; ++i) { ret = ib_query_gid(device, port, i, - gid_cache->table + i); + gid_cache->table + i, NULL); if (ret) { printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4f918b929eca..2d8a0e4c42d6 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -365,7 +365,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - &p, NULL)) { + NULL, &p, NULL)) { port = cm_dev->port[p-1]; break; } @@ -1643,7 +1643,8 @@ static int cm_req_handler(struct cm_work *work) ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, - work->port->port_num, 0, &work->path[0].sgid); + work->port->port_num, 0, &work->path[0].sgid, + NULL); ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36b12d560e17..e4d9d75ffc08 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -438,7 +438,7 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - ret = ib_find_cached_gid(device, gid, &found_port, NULL); + ret = ib_find_cached_gid(device, gid, NULL, &found_port, NULL); if (port != found_port) return -ENODEV; @@ -531,7 +531,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; - for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, + &gid, NULL); + i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -718,7 +720,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, - qp_attr.ah_attr.grh.sgid_index, &sgid); + qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); if (ret) goto out; @@ -2426,7 +2428,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv) p = 1; port_found: - ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); + ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); if (ret) goto out; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 17639117afc6..f22ce487fd3a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port); * @port_num:Port number to query * @index:GID table index to query * @gid:Returned GID + * @attr: Returned GID attributes related to this GID index (only in RoCE). + * NULL means ignore. * * ib_query_gid() fetches the specified GID table entry. */ int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid) + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr) { if (rdma_cap_roce_gid_table(device, port_num)) - return ib_get_cached_gid(device, port_num, index, gid); + return ib_get_cached_gid(device, port_num, index, gid, attr); + + if (attr) + return -EINVAL; return device->query_gid(device, port_num, index, gid); } @@ -819,12 +825,13 @@ EXPORT_SYMBOL(ib_modify_port); * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @ndev: The ndev related to the GID to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index) + struct net_device *ndev, u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; @@ -832,14 +839,14 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { if (rdma_cap_roce_gid_table(device, port)) { if (!ib_cache_gid_find_by_port(device, gid, port, - NULL, index)) { + ndev, index)) { *port_num = port; return 0; } } for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { - ret = ib_query_gid(device, port, i, &tmp_gid); + ret = ib_query_gid(device, port, i, &tmp_gid, NULL); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4b5c72311deb..fa63b89e15aa 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1877,7 +1877,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_ ((1 << lmc) - 1))); } else { if (ib_get_cached_gid(device, port_num, - attr.grh.sgid_index, &sgid)) + attr.grh.sgid_index, &sgid, NULL)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index d38d8b2b2979..bb6685fb08c6 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, u16 gid_index; u8 p; - ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); + ret = ib_find_cached_gid(device, &rec->port_gid, + NULL, &p, &gid_index); if (ret) return ret; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c014b33d8e0..9a4e7891ada2 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1010,7 +1010,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; - ret = ib_find_cached_gid(device, &rec->sgid, &port_num, + ret = ib_find_cached_gid(device, &rec->sgid, NULL, &port_num, &gid_index); if (ret) return ret; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 34cdd74b0a17..b1f37d4095fa 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, union ib_gid gid; ssize_t ret; - ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); + ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); if (ret) return ret; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e1f2c9887f3f..6012f5e5b97c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -344,8 +344,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(device, &grh->dgid, &port_num, - &gid_index); + ret = ib_find_cached_gid(device, &grh->dgid, + NULL, &port_num, &gid_index); if (ret) return ret; @@ -988,7 +988,8 @@ int ib_resolve_eth_l2_attrs(struct ib_qp *qp, if ((*qp_attr_mask & IB_QP_AV) && (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, &sgid); + qp_attr->ah_attr.grh.sgid_index, &sgid, + NULL); if (ret) goto out; if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8779d26d1e61..f63d5427bfc0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -335,7 +335,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) return index; - ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid); + ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL); if (ret) return ret; @@ -756,7 +756,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, if (!rdma_cap_roce_gid_table(ibdev, port)) return -ENODEV; - ret = ib_get_cached_gid(ibdev, port, index, gid); + ret = ib_get_cached_gid(ibdev, port, index, gid, NULL); if (ret == -EAGAIN) { memcpy(gid, &zgid, sizeof(*gid)); return 0; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ba25a1bfa52f..b52c9e1cdf1a 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2269,7 +2269,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid); + ah->av.ib.gid_index, &sgid, + NULL); if (err) return err; } @@ -2311,7 +2312,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid); + &sqp->ud_header.grh.source_gid, NULL); } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 32f6c6315454..bcac294042f5 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, ib_get_cached_gid(&dev->ib_dev, be32_to_cpu(ah->av->port_pd) >> 24, ah->av->gid_index % dev->limits.gid_table_len, - &header->grh.source_gid); + &header->grh.source_gid, NULL); memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 1c4e83d5d153..9bb710a402cd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, if (index >= OCRDMA_MAX_SGID) return -EINVAL; - ret = ib_get_cached_gid(ibdev, port, index, sgid); + ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL); if (ret == -EAGAIN) { memcpy(sgid, &zgid, sizeof(*sgid)); return 0; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index babba05d7a0e..cbb6721d0a65 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1860,7 +1860,7 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[9] = priv->pkey & 0xff; - result = ib_query_gid(hca, port, 0, &priv->local_gid); + result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); if (result) { printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d750a86042f3..d4b97614196c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -561,7 +561,7 @@ void ipoib_mcast_join_task(struct work_struct *work) } priv->local_lid = port_attr.lid; - if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) + if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) ipoib_warn(priv, "ib_query_gid() failed\n"); else memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b481490ad257..8ba887824d05 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3213,7 +3213,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); + ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); if (ret) goto out; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f6fe0414139b..a7ac77a02593 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -546,7 +546,8 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); + ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, + NULL); if (ret) goto err_query_port; diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index bd92130f4ac5..dcc9bed9b69a 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -43,6 +43,8 @@ * @port_num: The port number of the device to query. * @index: The index into the cached GID table to query. * @gid: The GID value found at the specified index. + * @attr: The GID attribute found at the specified index (only in RoCE). + * NULL means ignore (output parameter). * * ib_get_cached_gid() fetches the specified GID table entry stored in * the local software cache. @@ -50,13 +52,15 @@ int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, - union ib_gid *gid); + union ib_gid *gid, + struct ib_gid_attr *attr); /** * ib_find_cached_gid - Returns the port number and GID table index where * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @ndev: In RoCE, the net device of the device. NULL means ignore. * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. @@ -64,10 +68,11 @@ int ib_get_cached_gid(struct ib_device *device, * ib_find_cached_gid() searches for the specified GID value in * the local software cache. */ -int ib_find_cached_gid(struct ib_device *device, +int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, - u8 *port_num, - u16 *index); + struct net_device *ndev, + u8 *port_num, + u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e1f65e204d37..98ded0b749cd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2177,7 +2177,8 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, } int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid); + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr); int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); @@ -2191,7 +2192,7 @@ int ib_modify_port(struct ib_device *device, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index); + struct net_device *ndev, u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -- cgit v1.2.3 From d300ec528b799ca87935b3667f5563f397f00f85 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:46 +0300 Subject: IB/core: Expose and rename ib_find_cached_gid_by_port cache API Sometime consumers might want to search for a GID in a specific port. For example, when a WC arrives and we want to search the GID that matches that port - it's better to search only the relevant port. Exposing and renaming ib_cache_gid_find_by_port in order to match the naming convention of the module. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 9 +++++---- drivers/infiniband/core/core_priv.h | 5 ----- drivers/infiniband/core/device.c | 4 ++-- include/rdma/ib_cache.h | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 5c054072ef20..639a7266bfaf 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev, mask, port, index); } -int ib_cache_gid_find_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, struct net_device *ndev, - u16 *index) +int ib_find_cached_gid_by_port(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, struct net_device *ndev, + u16 *index) { int local_index; struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; @@ -438,6 +438,7 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev, return -ENOENT; } +EXPORT_SYMBOL(ib_find_cached_gid_by_port); static struct ib_gid_table *alloc_gid_table(int sz) { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 70bb36ebb03b..0df82b1f84c1 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); -int ib_cache_gid_find_by_port(struct ib_device *ib_dev, - const union ib_gid *gid, - u8 port, struct net_device *ndev, - u16 *index); - enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index f22ce487fd3a..179e8134d57f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -838,8 +838,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { if (rdma_cap_roce_gid_table(device, port)) { - if (!ib_cache_gid_find_by_port(device, gid, port, - ndev, index)) { + if (!ib_find_cached_gid_by_port(device, gid, port, + ndev, index)) { *port_num = port; return 0; } diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index dcc9bed9b69a..679d7ca6a3ee 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -74,6 +74,25 @@ int ib_find_cached_gid(struct ib_device *device, u8 *port_num, u16 *index); +/** + * ib_find_cached_gid_by_port - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value sould be + * searched. + * @ndev: In RoCE, the net device of the device. Null means ignore. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid_by_port(struct ib_device *device, + const union ib_gid *gid, + u8 port_num, + struct net_device *ndev, + u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. -- cgit v1.2.3 From ba36e37fd3ca3dc8f215b14bcfdccf9f41b65767 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:47 +0300 Subject: IB/core: Add netdev to path record In order to find the sgid_index, one could just query the IB cache with the correct GID and netdevice. Therefore, instead of storing the L2 attributes directly in the path, we only store the ifindex and net and use them later to get the sgid_index. The vlan_id and smac L2 attributes are removed in a later patch. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 13 +++++++++++-- drivers/infiniband/core/uverbs_marshall.c | 2 ++ include/rdma/ib_sa.h | 10 ++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9a4e7891ada2..c9d9d7afcdb4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1007,18 +1007,25 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, force_grh = rdma_cap_eth_ah(device, port_num); if (rec->hop_limit > 1 || force_grh) { + struct net_device *ndev = ib_get_ndev_from_path(rec); + ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; - ret = ib_find_cached_gid(device, &rec->sgid, NULL, &port_num, + ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num, &gid_index); - if (ret) + if (ret) { + if (ndev) + dev_put(ndev); return ret; + } ah_attr->grh.sgid_index = gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; + if (ndev) + dev_put(ndev); } if (force_grh) { memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); @@ -1151,6 +1158,8 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.vlan_id = 0xffff; + rec.net = NULL; + rec.ifindex = 0; memset(rec.dmac, 0, ETH_ALEN); memset(rec.smac, 0, ETH_ALEN); query->callback(status, &rec, query->context); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index abd97247443e..484698c94d7f 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -144,5 +144,7 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, memset(dst->smac, 0, sizeof(dst->smac)); memset(dst->dmac, 0, sizeof(dst->dmac)); dst->vlan_id = 0xffff; + dst->net = NULL; + dst->ifindex = 0; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 7e071a6abb34..406ecf177f21 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -157,8 +158,17 @@ struct ib_sa_path_rec { u8 smac[ETH_ALEN]; u8 dmac[ETH_ALEN]; u16 vlan_id; + /* ignored in IB */ + int ifindex; + /* ignored in IB */ + struct net *net; }; +static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) +{ + return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL; +} + #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) #define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) #define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) -- cgit v1.2.3 From c2c6ff134596e2691de7506667b712bef93cb1f0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:48 +0300 Subject: IB/cm: cm_init_av_by_path should find a GID by its netdevice Previously, the CM has searched the cache for any sgid_index whose GID matches the path's GID. Since the path record stores the net device, the CM should now search only for GIDs which originated from this net device. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2d8a0e4c42d6..db242bb00299 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -361,17 +361,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) unsigned long flags; int ret; u8 p; + struct net_device *ndev = ib_get_ndev_from_path(path); read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, - NULL, &p, NULL)) { + ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); + if (ndev) + dev_put(ndev); + if (!port) return -EINVAL; @@ -384,7 +388,6 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; - memcpy(av->smac, path->smac, sizeof(av->smac)); av->valid = 1; return 0; -- cgit v1.2.3 From abae1b71dd37bab506b14a6cf6ba7148f4d57232 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:49 +0300 Subject: IB/cma: cma_validate_port should verify the port and netdevice Previously, cma_validate_port searched for GIDs in IB cache and then tried to verify the found port. This could fail when there are identical GIDs on both ports. In addition, netdevice should be taken into account when searching the GID table. Fixing cma_validate_port to search only the relevant port's cache and netdevice. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e4d9d75ffc08..2cbf9c9b4fe4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -427,10 +427,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a } static inline int cma_validate_port(struct ib_device *device, u8 port, - union ib_gid *gid, int dev_type) + union ib_gid *gid, int dev_type, + int bound_if_index) { - u8 found_port; int ret = -ENODEV; + struct net_device *ndev = NULL; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ret; @@ -438,9 +439,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - ret = ib_find_cached_gid(device, gid, NULL, &found_port, NULL); - if (port != found_port) - return -ENODEV; + if (dev_type == ARPHRD_ETHER) + ndev = dev_get_by_index(&init_net, bound_if_index); + + ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); + + if (ndev) + dev_put(ndev); return ret; } @@ -472,7 +477,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, gidp, - dev_addr->dev_type); + dev_addr->dev_type, + dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; @@ -490,7 +496,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv, &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, gidp, - dev_addr->dev_type); + dev_addr->dev_type, + dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; @@ -2296,8 +2303,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; - if (addr->dev_addr.bound_dev_if) + if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + route->path_rec->net = &init_net; + route->path_rec->ifindex = addr->dev_addr.bound_dev_if; + } if (!ndev) { ret = -ENODEV; goto err2; -- cgit v1.2.3 From 99b27e3b5da0871cb43980960fb14ff625adffad Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:50 +0300 Subject: IB/cache: Add ib_find_gid_by_filter cache API GID cache API users might want to search for GIDs with specific attributes rather than just specifying GID, net device and port. This is used in a later patch, where we find the sgid index by L2 Ethernet attributes. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 93 +++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_cache.h | 8 ++++ 2 files changed, 101 insertions(+) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 639a7266bfaf..89bebeada38b 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -440,6 +440,81 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev, } EXPORT_SYMBOL(ib_find_cached_gid_by_port); +/** + * ib_find_gid_by_filter - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value could be + * searched. + * @filter: The filter function is executed on any matching GID in the table. + * If the filter function returns true, the corresponding index is returned, + * otherwise, we continue searching the GID table. It's guaranteed that + * while filter is executed, ndev field is valid and the structure won't + * change. filter is executed in an atomic context. filter must not be NULL. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_cache_gid_find_by_filter() searches for the specified GID value + * of which the filter function returns true in the port's GID table. + * This function is only supported on RoCE ports. + * + */ +static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, + const union ib_gid *gid, + u8 port, + bool (*filter)(const union ib_gid *, + const struct ib_gid_attr *, + void *), + void *context, + u16 *index) +{ + struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; + struct ib_gid_table *table; + unsigned int i; + bool found = false; + + if (!ports_table) + return -EOPNOTSUPP; + + if (port < rdma_start_port(ib_dev) || + port > rdma_end_port(ib_dev) || + !rdma_protocol_roce(ib_dev, port)) + return -EPROTONOSUPPORT; + + table = ports_table[port - rdma_start_port(ib_dev)]; + + for (i = 0; i < table->sz; i++) { + struct ib_gid_attr attr; + unsigned long flags; + + read_lock_irqsave(&table->data_vec[i].lock, flags); + if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) + goto next; + + if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) + goto next; + + memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); + + if (filter(gid, &attr, context)) + found = true; + +next: + read_unlock_irqrestore(&table->data_vec[i].lock, flags); + + if (found) + break; + } + + if (!found) + return -ENOENT; + + if (index) + *index = i; + return 0; +} + static struct ib_gid_table *alloc_gid_table(int sz) { unsigned int i; @@ -670,6 +745,24 @@ int ib_find_cached_gid(struct ib_device *device, } EXPORT_SYMBOL(ib_find_cached_gid); +int ib_find_gid_by_filter(struct ib_device *device, + const union ib_gid *gid, + u8 port_num, + bool (*filter)(const union ib_gid *gid, + const struct ib_gid_attr *, + void *), + void *context, u16 *index) +{ + /* Only RoCE GID table supports filter function */ + if (!rdma_cap_roce_gid_table(device, port_num) && filter) + return -EPROTONOSUPPORT; + + return ib_cache_gid_find_by_filter(device, gid, + port_num, filter, + context, index); +} +EXPORT_SYMBOL(ib_find_gid_by_filter); + int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 679d7ca6a3ee..269a27cf0a46 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -93,6 +93,14 @@ int ib_find_cached_gid_by_port(struct ib_device *device, u8 port_num, struct net_device *ndev, u16 *index); + +int ib_find_gid_by_filter(struct ib_device *device, + const union ib_gid *gid, + u8 port_num, + bool (*filter)(const union ib_gid *gid, + const struct ib_gid_attr *, + void *), + void *context, u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. -- cgit v1.2.3 From dbf727de7440f73c4b92be4b958cbc24977e8ca2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:51 +0300 Subject: IB/core: Use GID table in AH creation and dmac resolution Previously, vlan id and source MAC were used from QP attributes. Since the net device is now stored in the GID attributes, they could be used instead of getting this information from the QP attributes. IB_QP_SMAC, IB_QP_ALT_SMAC, IB_QP_VID and IB_QP_ALT_VID were removed because there is no known libibverbs that uses them. This commit also modifies the vendors (mlx4, ocrdma) drivers in order to use the new approach. ocrdma driver changes were done by Somnath Kotur Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 3 +- drivers/infiniband/core/core_priv.h | 4 +- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 160 ++++++++++++++++++------------- drivers/infiniband/hw/mlx4/ah.c | 17 +++- drivers/infiniband/hw/mlx4/mad.c | 12 ++- drivers/infiniband/hw/mlx4/mcg.c | 2 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/qp.c | 47 +++++++-- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 22 +++-- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 30 +++--- include/rdma/ib_addr.h | 2 +- 12 files changed, 193 insertions(+), 110 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 746cdf56bc76..d3c42b3c1b51 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -458,7 +458,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr, } int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *dmac, u16 *vlan_id) + u8 *dmac, u16 *vlan_id, int if_index) { int ret = 0; struct rdma_dev_addr dev_addr; @@ -476,6 +476,7 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.bound_dev_if = if_index; ctx.addr = &dev_addr; init_completion(&ctx.comp); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0df82b1f84c1..5cf6eb716f00 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ece9f4c5e4a4..74dbfd782e19 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2345,7 +2345,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); + ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); if (ret) goto release_qp; ret = qp->device->modify_qp(qp, attr, diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6012f5e5b97c..46d97f09fbae 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #include #include @@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); +struct find_gid_index_context { + u16 vlan_id; +}; + +static bool find_gid_index(const union ib_gid *gid, + const struct ib_gid_attr *gid_attr, + void *context) +{ + struct find_gid_index_context *ctx = + (struct find_gid_index_context *)context; + + if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || + (is_vlan_dev(gid_attr->ndev) && + vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) + return false; + + return true; +} + +static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, + u16 vlan_id, const union ib_gid *sgid, + u16 *gid_index) +{ + struct find_gid_index_context context = {.vlan_id = vlan_id}; + + return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, + &context, gid_index); +} + int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct ib_ah_attr *ah_attr) @@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, memset(ah_attr, 0, sizeof *ah_attr); if (rdma_cap_eth_ah(device, port_num)) { + u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? + wc->vlan_id : 0xffff; + if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; - if (wc->wc_flags & IB_WC_WITH_SMAC && - wc->wc_flags & IB_WC_WITH_VLAN) { - memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); - ah_attr->vlan_id = wc->vlan_id; - } else { + if (!(wc->wc_flags & IB_WC_WITH_SMAC) || + !(wc->wc_flags & IB_WC_WITH_VLAN)) { ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, - ah_attr->dmac, &ah_attr->vlan_id); + ah_attr->dmac, + wc->wc_flags & IB_WC_WITH_VLAN ? + NULL : &vlan_id, + 0); if (ret) return ret; } - } else { - ah_attr->vlan_id = 0xffff; + + ret = get_sgid_index_from_eth(device, port_num, vlan_id, + &grh->dgid, &gid_index); + if (ret) + return ret; + + if (wc->wc_flags & IB_WC_WITH_SMAC) + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); } ah_attr->dlid = wc->slid; @@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = grh->sgid; - ret = ib_find_cached_gid(device, &grh->dgid, - NULL, &port_num, &gid_index); - if (ret) - return ret; + if (!rdma_cap_eth_ah(device, port_num)) { + ret = ib_find_cached_gid_by_port(device, &grh->dgid, + port_num, NULL, + &gid_index); + if (ret) + return ret; + } ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); @@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; - enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; - enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -700,12 +742,6 @@ static const struct { IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, - .req_param_add_eth = { - [IB_QPT_RC] = (IB_QP_SMAC), - [IB_QPT_UC] = (IB_QP_SMAC), - [IB_QPT_XRC_INI] = (IB_QP_SMAC), - [IB_QPT_XRC_TGT] = (IB_QP_SMAC) - }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), @@ -726,21 +762,7 @@ static const struct { [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), }, - .opt_param_add_eth = { - [IB_QPT_RC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_UC] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID), - [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | - IB_QP_VID | - IB_QP_ALT_VID) - } - } + }, }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; - if (ll == IB_LINK_LAYER_ETHERNET) { - req_param |= qp_state_table[cur_state][next_state]. - req_param_add_eth[type]; - opt_param |= qp_state_table[cur_state][next_state]. - opt_param_add_eth[type]; - } - if ((mask & req_param) != req_param) return 0; @@ -979,41 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_l2_attrs(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret = 0; - union ib_gid sgid; - if ((*qp_attr_mask & IB_QP_AV) && - (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { - ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, &sgid, - NULL); - if (ret) - goto out; + if (*qp_attr_mask & IB_QP_AV) { + if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || + qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) + return -EINVAL; + + if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) + return 0; + if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); - rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); - if (!(*qp_attr_mask & IB_QP_VID)) - qp_attr->vlan_id = rdma_get_vlan_id(&sgid); + rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, + qp_attr->ah_attr.dmac); } else { - ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, &qp_attr->vlan_id); - if (ret) - goto out; - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); - if (ret) + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + + ret = ib_query_gid(qp->device, + qp_attr->ah_attr.port_num, + qp_attr->ah_attr.grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; goto out; + } + + ifindex = sgid_attr.ndev->ifindex; + + ret = rdma_addr_find_dmac_by_grh(&sgid, + &qp_attr->ah_attr.grh.dgid, + qp_attr->ah_attr.dmac, + NULL, ifindex); + + dev_put(sgid_attr.ndev); } - *qp_attr_mask |= IB_QP_SMAC; - if (qp_attr->vlan_id < 0xFFFF) - *qp_attr_mask |= IB_QP_VID; } out: return ret; } -EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); +EXPORT_SYMBOL(ib_resolve_eth_dmac); int ib_modify_qp(struct ib_qp *qp, @@ -1022,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp, { int ret; - ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); + ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 1688a17de4fe..86af71351d9a 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr struct mlx4_dev *dev = ibdev->dev; int is_mcast = 0; struct in6_addr in6; - u16 vlan_tag; + u16 vlan_tag = 0xffff; + union ib_gid sgid; + struct ib_gid_attr gid_attr; + int ret; memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); if (rdma_is_multicast_addr(&in6)) { @@ -85,7 +88,17 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr } else { memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); } - vlan_tag = ah_attr->vlan_id; + ret = ib_get_cached_gid(pd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) + return ERR_PTR(ret); + memset(ah->av.eth.s_mac, 0, ETH_ALEN); + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 68f256716785..67c7120f0d52 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1183,7 +1183,7 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, - u8 *s_mac, struct ib_mad *mad) + u8 *s_mac, u16 vlan_id, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1270,6 +1270,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.send_flags = IB_SEND_SIGNALED; if (s_mac) memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + if (vlan_id < 0x1000) + vlan_id |= (attr->sl & 7) << 13; + to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id); ret = ib_post_send(send_qp, &wr, &bad_wr); @@ -1306,6 +1309,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u8 *slave_id; int slave; int port; + u16 vlan_id; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1394,10 +1398,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); - ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + vlan_id = be16_to_cpu(tunnel->hdr.vlan); /* if slave have default vlan use it */ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, - &ah_attr.vlan_id, &ah_attr.sl); + &vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1405,7 +1409,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, &tunnel->mad); + &ah_attr, wc->smac, vlan_id, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 2d5bccd71fc6..99451d887266 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) spin_unlock_irqrestore(&dev->sm_lock, flags); return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, - &ah_attr, NULL, mad); + &ah_attr, NULL, 0xffff, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 086416a863ae..28a99e08030e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -821,7 +821,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, - struct ib_mad *mad); + u16 vlan_id, struct ib_mad *mad); __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b52c9e1cdf1a..42a051b088e4 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1409,11 +1409,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, struct mlx4_ib_qp *mqp, - struct mlx4_qp_path *path, u8 port) + struct mlx4_qp_path *path, u8 port, + u16 vlan_id, u8 *smac) { return _mlx4_set_path(dev, &qp->ah_attr, - mlx4_mac_to_u64((u8 *)qp->smac), - (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, + mlx4_mac_to_u64(smac), + vlan_id, path, &mqp->pri, port); } @@ -1424,9 +1425,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, - mlx4_mac_to_u64((u8 *)qp->alt_smac), - (qp_attr_mask & IB_QP_ALT_VID) ? - qp->alt_vlan_id : 0xffff, + 0, + 0xffff, path, &mqp->alt, port); } @@ -1442,7 +1442,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } -static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, + struct mlx4_ib_qp *qp, struct mlx4_qp_context *context) { u64 u64_mac; @@ -1635,9 +1636,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { + u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + union ib_gid gid; + struct ib_gid_attr gid_attr; + u16 vlan = 0xffff; + u8 smac[ETH_ALEN]; + int status = 0; + + if (rdma_cap_eth_ah(&dev->ib_dev, port_num) && + attr->ah_attr.ah_flags & IB_AH_GRH) { + int index = attr->ah_attr.grh.sgid_index; + + status = ib_get_cached_gid(ibqp->device, port_num, + index, &gid, &gid_attr); + if (!status && !memcmp(&gid, &zgid, sizeof(gid))) + status = -ENOENT; + if (!status && gid_attr.ndev) { + vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev); + memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(gid_attr.ndev); + } + } + if (status) + goto out; + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, - attr_mask & IB_QP_PORT ? - attr->port_num : qp->port)) + port_num, vlan, smac)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | @@ -1774,7 +1799,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { - err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + err = handle_eth_ud_smac_index(dev, qp, context); if (err) { err = -EINVAL; goto out; @@ -2271,6 +2296,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sgid, NULL); + if (!err && !memcmp(&sgid, &zgid, sizeof(sgid))) + err = -ENOENT; if (err) return err; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 44766fee1f4e..9820074be59d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -45,6 +45,7 @@ #include #include +#include #include "ocrdma.h" #include "ocrdma_verbs.h" @@ -56,10 +57,9 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, union ib_gid *sgid, - int pdid, bool *isvlan) + int pdid, bool *isvlan, u16 vlan_tag) { int status = 0; - u16 vlan_tag; struct ocrdma_eth_vlan eth; struct ocrdma_grh grh; int eth_sz; @@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, memset(&grh, 0, sizeof(grh)); /* VLAN */ - vlan_tag = attr->vlan_id; if (!vlan_tag || (vlan_tag > 0xFFF)) vlan_tag = dev->pvid; if (vlan_tag || dev->pfc_state) { @@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) { u32 *ahid_addr; - bool isvlan = false; int status; struct ocrdma_ah *ah; + bool isvlan = false; + u16 vlan_tag = 0xffff; + struct ib_gid_attr sgid_attr; struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); union ib_gid sgid; @@ -135,18 +136,25 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (status) goto av_err; - status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid); + status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid, + &sgid_attr); if (status) { pr_err("%s(): Failed to query sgid, status = %d\n", __func__, status); goto av_conf_err; } + if (sgid_attr.ndev) { + if (is_vlan_dev(sgid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev); + dev_put(sgid_attr.ndev); + } if ((pd->uctx) && (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, - attr->dmac, &attr->vlan_id); + attr->dmac, &vlan_tag, + sgid_attr.ndev->ifindex); if (status) { pr_err("%s(): Failed to resolve dmac from gid." "status = %d\n", __func__, status); @@ -154,7 +162,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) } } - status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan); + status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); if (status) goto av_conf_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 9d9914299866..30f67bebffa3 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -47,6 +47,7 @@ #include #include +#include #include "ocrdma.h" #include "ocrdma_hw.h" @@ -2470,6 +2471,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, int status; struct ib_ah_attr *ah_attr = &attrs->ah_attr; union ib_gid sgid, zgid; + struct ib_gid_attr sgid_attr; u32 vlan_id = 0xFFFF; u8 mac_addr[6]; struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); @@ -2488,10 +2490,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); - status = ocrdma_query_gid(&dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); - if (status) - return status; + + status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + if (!status && sgid_attr.ndev) { + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN); + dev_put(sgid_attr.ndev); + } memset(&zgid, 0, sizeof(zgid)); if (!memcmp(&sgid, &zgid, sizeof(zgid))) @@ -2508,17 +2514,15 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - if (attr_mask & IB_QP_VID) { - vlan_id = attrs->vlan_id; - } else if (dev->pfc_state) { - vlan_id = 0; - pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", - dev->id); - pr_err("ocrdma%d:Using VLAN 0 for this connection\n", - dev->id); - } if (vlan_id < 0x1000) { + if (dev->pfc_state) { + vlan_id = 0; + pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", + dev->id); + pr_err("ocrdma%d:Using VLAN 0 for this connection\n", + dev->id); + } cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index fde33ac6b58a..17e4a8bdecab 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -112,7 +112,7 @@ int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id); + u8 *smac, u16 *vlan_id, int if_index); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { -- cgit v1.2.3 From 5c266b2304fb886d1f2b1e3c5f5be91ff1c61254 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:52 +0300 Subject: IB/cm: Remove the usage of smac and vid of qp_attr and cm_av The cm and cma don't need to explicitly handle vlan and smac, as they are resolved from the GID index now. Removing this portion of code. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 30 ------------------------------ drivers/infiniband/core/cma.c | 6 ------ 2 files changed, 36 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index db242bb00299..0a26dd6d9b19 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -179,8 +179,6 @@ struct cm_av { struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; - u8 valid; - u8 smac[ETH_ALEN]; }; struct cm_work { @@ -389,7 +387,6 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) &av->ah_attr); av->timeout = path->packet_life_time + 1; - av->valid = 1; return 0; } @@ -1642,7 +1639,6 @@ static int cm_req_handler(struct cm_work *work) cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); - work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -3622,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; - if (!cm_id_priv->av.valid) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return -EINVAL; - } - if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { - qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_VID; - } - if (!is_zero_ether_addr(cm_id_priv->av.smac)) { - memcpy(qp_attr->smac, cm_id_priv->av.smac, - sizeof(qp_attr->smac)); - *qp_attr_mask |= IB_QP_SMAC; - } - if (cm_id_priv->alt_av.valid) { - if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { - qp_attr->alt_vlan_id = - cm_id_priv->alt_av.ah_attr.vlan_id; - *qp_attr_mask |= IB_QP_ALT_VID; - } - if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { - memcpy(qp_attr->alt_smac, - cm_id_priv->alt_av.smac, - sizeof(qp_attr->alt_smac)); - *qp_attr_mask |= IB_QP_ALT_SMAC; - } - } qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2cbf9c9b4fe4..09ddff8cc2ac 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -733,12 +733,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, BUG_ON(id_priv->cma_dev->device != id_priv->id.device); - if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { - ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); - - if (ret) - goto out; - } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); -- cgit v1.2.3 From aa744cc01fe0f21dfbe2744d3fd5f2fb3244c9b3 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:53 +0300 Subject: IB/core: Remove smac and vlan id from qp_attr and ah_attr Smac and vlan id could be resolved from the GID attribute, and thus these attributes aren't needed anymore. Removing them. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 4 ---- drivers/infiniband/core/ucma.c | 1 - drivers/infiniband/core/uverbs_cmd.c | 1 - include/rdma/ib_verbs.h | 13 ++++--------- 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index c9d9d7afcdb4..77f5afc297c6 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1029,11 +1029,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, } if (force_grh) { memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); - ah_attr->vlan_id = rec->vlan_id; - } else { - ah_attr->vlan_id = 0xffff; } - return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 30467d10df91..3a342dfaac15 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1211,7 +1211,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx, return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); - sa_path.vlan_id = 0xffff; ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 74dbfd782e19..8fd081ae9aa9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2822,7 +2822,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; - attr.vlan_id = 0; memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 98ded0b749cd..e4cc389c43cb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -699,7 +699,6 @@ struct ib_ah_attr { u8 ah_flags; u8 port_num; u8 dmac[ETH_ALEN]; - u16 vlan_id; }; enum ib_wc_status { @@ -958,10 +957,10 @@ enum ib_qp_attr_mask { IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), IB_QP_DEST_QPN = (1<<20), - IB_QP_SMAC = (1<<21), - IB_QP_ALT_SMAC = (1<<22), - IB_QP_VID = (1<<23), - IB_QP_ALT_VID = (1<<24), + IB_QP_RESERVED1 = (1<<21), + IB_QP_RESERVED2 = (1<<22), + IB_QP_RESERVED3 = (1<<23), + IB_QP_RESERVED4 = (1<<24), }; enum ib_qp_state { @@ -1011,10 +1010,6 @@ struct ib_qp_attr { u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; - u8 smac[ETH_ALEN]; - u8 alt_smac[ETH_ALEN]; - u16 vlan_id; - u16 alt_vlan_id; }; enum ib_wr_opcode { -- cgit v1.2.3 From 10e07f13c06690488087f5d3f2c59a9728def339 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 Oct 2015 18:38:54 +0300 Subject: IB/core: Remove smac and vlan id from path record The GID cache accompanies every GID with attributes. The GID attributes link the GID with its netdevice, which could be resolved to smac and vlan id easily. Since we've added the netdevice (ifindex and net) to the path record, storing the L2 attributes is duplicated data and hence these attributes are removed. Signed-off-by: Matan Barak Reviewed-By: Devesh Sharma Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 -- drivers/infiniband/core/sa_query.c | 2 -- drivers/infiniband/core/uverbs_marshall.c | 2 -- include/rdma/ib_sa.h | 2 -- 4 files changed, 8 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 09ddff8cc2ac..be41784a62e4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2307,9 +2307,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); - memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 77f5afc297c6..dcdaa79e3f0f 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1153,11 +1153,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); - rec.vlan_id = 0xffff; rec.net = NULL; rec.ifindex = 0; memset(rec.dmac, 0, ETH_ALEN); - memset(rec.smac, 0, ETH_ALEN); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 484698c94d7f..7d2f14c9bbef 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -141,9 +141,7 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; - memset(dst->smac, 0, sizeof(dst->smac)); memset(dst->dmac, 0, sizeof(dst->dmac)); - dst->vlan_id = 0xffff; dst->net = NULL; dst->ifindex = 0; } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 406ecf177f21..301969552d0a 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -155,9 +155,7 @@ struct ib_sa_path_rec { u8 packet_life_time_selector; u8 packet_life_time; u8 preference; - u8 smac[ETH_ALEN]; u8 dmac[ETH_ALEN]; - u16 vlan_id; /* ignored in IB */ int ifindex; /* ignored in IB */ -- cgit v1.2.3 From 78fc3fc4cc33df385dd5e5e02630cf467790e758 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Oct 2015 10:59:18 -0700 Subject: IB/iser: Remove an unused variable Detected this by compiling with W=1. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8f2f1057370c..3d0bdb87a653 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -126,7 +126,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, { int rc = 0; int datalen; - int ahslen; /* verify PDU length */ datalen = ntoh24(hdr->dlength); @@ -141,9 +140,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr, iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", datalen, rx_data_len); - /* read AHS */ - ahslen = hdr->hlength * 4; - rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) goto error; -- cgit v1.2.3 From 6c760b3dd576329e776b353f2eaefbe2034361b9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 22 Oct 2015 11:14:18 -0700 Subject: iser-target: Remove an unused variable Detected this by compiling with W=1. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/isert/ib_isert.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index aa59037d7504..b58ac25ce19d 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1579,7 +1579,6 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) struct iser_hdr *iser_hdr = &rx_desc->iser_header; uint64_t read_va = 0, write_va = 0; uint32_t read_stag = 0, write_stag = 0; - int rc; switch (iser_hdr->flags & 0xF0) { case ISCSI_CTRL: @@ -1606,8 +1605,8 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn) break; } - rc = isert_rx_opcode(isert_conn, rx_desc, - read_stag, read_va, write_stag, write_va); + isert_rx_opcode(isert_conn, rx_desc, + read_stag, read_va, write_stag, write_va); } static void -- cgit v1.2.3 From dd0107a08996c0ab8cac2b98ddbed5313e118e81 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:12:58 +0300 Subject: IB/iser: set block queue_virt_boundary The block layer can reliably guarantee that SG lists won't contain gaps (page unaligned) if a driver set the queue virt_boundary. With this setting the block layer will: - refuse merges if bios are not aligned to the virtual boundary - split bios/requests that are not aligned to the virtual boundary - or, bounce buffer SG_IOs that are not aligned to the virtual boundary Since iser is working in 4K page size, set the virt_boundary to 4K pages. With this setting, we can now safely remove the bounce buffering logic in iser. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 12 +- drivers/infiniband/ulp/iser/iscsi_iser.h | 7 +- drivers/infiniband/ulp/iser/iser_initiator.c | 51 +---- drivers/infiniband/ulp/iser/iser_memory.c | 274 --------------------------- 4 files changed, 18 insertions(+), 326 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 3d0bdb87a653..2ea0a14125e8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -762,9 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; - stats->custom_length = 1; - strcpy(stats->custom[0].desc, "fmr_unalign_cnt"); - stats->custom[0].value = conn->fmr_unalign_cnt; + stats->custom_length = 0; } static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, @@ -969,6 +967,13 @@ static umode_t iser_attr_is_visible(int param_type, int param) return 0; } +static int iscsi_iser_slave_alloc(struct scsi_device *sdev) +{ + blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K); + + return 0; +} + static struct scsi_host_template iscsi_iser_sht = { .module = THIS_MODULE, .name = "iSCSI Initiator over iSER", @@ -982,6 +987,7 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, .use_clustering = DISABLE_CLUSTERING, + .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, .track_queue_depth = 1, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a5edd6ede692..d8bbad9eb59b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -227,18 +227,13 @@ enum iser_data_dir { * @size: num entries of this sg * @data_len: total beffer byte len * @dma_nents: returned by dma_map_sg - * @orig_sg: pointer to the original sg list (in case - * we used a copy) - * @orig_size: num entris of orig sg list */ struct iser_data_buf { struct scatterlist *sg; unsigned int size; unsigned long data_len; unsigned int dma_nents; - struct scatterlist *orig_sg; - unsigned int orig_size; - }; +}; /* fwd declarations */ struct iser_device; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index d511879d8cdf..ffd00c420729 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -661,48 +661,14 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - int is_rdma_data_aligned = 1; - int is_rdma_prot_aligned = 1; int prot_count = scsi_prot_sg_count(iser_task->sc); - /* if we were reading, copy back to unaligned sglist, - * anyway dma_unmap and free the copy - */ - if (iser_task->data[ISER_DIR_IN].orig_sg) { - is_rdma_data_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->data[ISER_DIR_IN], - ISER_DIR_IN); - } - - if (iser_task->data[ISER_DIR_OUT].orig_sg) { - is_rdma_data_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->data[ISER_DIR_OUT], - ISER_DIR_OUT); - } - - if (iser_task->prot[ISER_DIR_IN].orig_sg) { - is_rdma_prot_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->prot[ISER_DIR_IN], - ISER_DIR_IN); - } - - if (iser_task->prot[ISER_DIR_OUT].orig_sg) { - is_rdma_prot_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, - &iser_task->prot[ISER_DIR_OUT], - ISER_DIR_OUT); - } - if (iser_task->dir[ISER_DIR_IN]) { iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); - if (is_rdma_data_aligned) - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_IN], - DMA_FROM_DEVICE); - if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_IN], + DMA_FROM_DEVICE); + if (prot_count) iser_dma_unmap_task_data(iser_task, &iser_task->prot[ISER_DIR_IN], DMA_FROM_DEVICE); @@ -710,11 +676,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) if (iser_task->dir[ISER_DIR_OUT]) { iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); - if (is_rdma_data_aligned) - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_OUT], - DMA_TO_DEVICE); - if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_OUT], + DMA_TO_DEVICE); + if (prot_count) iser_dma_unmap_task_data(iser_task, &iser_task->prot[ISER_DIR_OUT], DMA_TO_DEVICE); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4c46d67d37a1..3e0452c4248f 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -88,113 +88,6 @@ int iser_assign_reg_ops(struct iser_device *device) return 0; } -static void -iser_free_bounce_sg(struct iser_data_buf *data) -{ - struct scatterlist *sg; - int count; - - for_each_sg(data->sg, sg, data->size, count) - __free_page(sg_page(sg)); - - kfree(data->sg); - - data->sg = data->orig_sg; - data->size = data->orig_size; - data->orig_sg = NULL; - data->orig_size = 0; -} - -static int -iser_alloc_bounce_sg(struct iser_data_buf *data) -{ - struct scatterlist *sg; - struct page *page; - unsigned long length = data->data_len; - int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE); - - sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC); - if (!sg) - goto err; - - sg_init_table(sg, nents); - while (length) { - u32 page_len = min_t(u32, length, PAGE_SIZE); - - page = alloc_page(GFP_ATOMIC); - if (!page) - goto err; - - sg_set_page(&sg[i], page, page_len, 0); - length -= page_len; - i++; - } - - data->orig_sg = data->sg; - data->orig_size = data->size; - data->sg = sg; - data->size = nents; - - return 0; - -err: - for (; i > 0; i--) - __free_page(sg_page(&sg[i - 1])); - kfree(sg); - - return -ENOMEM; -} - -static void -iser_copy_bounce(struct iser_data_buf *data, bool to_buffer) -{ - struct scatterlist *osg, *bsg = data->sg; - void *oaddr, *baddr; - unsigned int left = data->data_len; - unsigned int bsg_off = 0; - int i; - - for_each_sg(data->orig_sg, osg, data->orig_size, i) { - unsigned int copy_len, osg_off = 0; - - oaddr = kmap_atomic(sg_page(osg)) + osg->offset; - copy_len = min(left, osg->length); - while (copy_len) { - unsigned int len = min(copy_len, bsg->length - bsg_off); - - baddr = kmap_atomic(sg_page(bsg)) + bsg->offset; - if (to_buffer) - memcpy(baddr + bsg_off, oaddr + osg_off, len); - else - memcpy(oaddr + osg_off, baddr + bsg_off, len); - - kunmap_atomic(baddr - bsg->offset); - osg_off += len; - bsg_off += len; - copy_len -= len; - - if (bsg_off >= bsg->length) { - bsg = sg_next(bsg); - bsg_off = 0; - } - } - kunmap_atomic(oaddr - osg->offset); - left -= osg_off; - } -} - -static inline void -iser_copy_from_bounce(struct iser_data_buf *data) -{ - iser_copy_bounce(data, false); -} - -static inline void -iser_copy_to_bounce(struct iser_data_buf *data) -{ - iser_copy_bounce(data, true); -} - struct iser_fr_desc * iser_reg_desc_get_fr(struct ib_conn *ib_conn) { @@ -238,62 +131,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn, { } -/** - * iser_start_rdma_unaligned_sg - */ -static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir cmd_dir) -{ - struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; - int rc; - - rc = iser_alloc_bounce_sg(data); - if (rc) { - iser_err("Failed to allocate bounce for data len %lu\n", - data->data_len); - return rc; - } - - if (cmd_dir == ISER_DIR_OUT) - iser_copy_to_bounce(data); - - data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (!data->dma_nents) { - iser_err("Got dma_nents %d, something went wrong...\n", - data->dma_nents); - rc = -ENOMEM; - goto err; - } - - return 0; -err: - iser_free_bounce_sg(data); - return rc; -} - -/** - * iser_finalize_rdma_unaligned_sg - */ - -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum iser_data_dir cmd_dir) -{ - struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; - - ib_dma_unmap_sg(dev, data->sg, data->size, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - if (cmd_dir == ISER_DIR_IN) - iser_copy_from_bounce(data); - - iser_free_bounce_sg(data); -} - #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) /** @@ -355,64 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data, return cur_page; } - -/** - * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned - * for RDMA sub-list of a scatter-gather list of memory buffers, and returns - * the number of entries which are aligned correctly. Supports the case where - * consecutive SG elements are actually fragments of the same physcial page. - */ -static int iser_data_buf_aligned_len(struct iser_data_buf *data, - struct ib_device *ibdev, - unsigned sg_tablesize) -{ - struct scatterlist *sg, *sgl, *next_sg = NULL; - u64 start_addr, end_addr; - int i, ret_len, start_check = 0; - - if (data->dma_nents == 1) - return 1; - - sgl = data->sg; - start_addr = ib_sg_dma_address(ibdev, sgl); - - if (unlikely(sgl[0].offset && - data->data_len >= sg_tablesize * PAGE_SIZE)) { - iser_dbg("can't register length %lx with offset %x " - "fall to bounce buffer\n", data->data_len, - sgl[0].offset); - return 0; - } - - for_each_sg(sgl, sg, data->dma_nents, i) { - if (start_check && !IS_4K_ALIGNED(start_addr)) - break; - - next_sg = sg_next(sg); - if (!next_sg) - break; - - end_addr = start_addr + ib_sg_dma_len(ibdev, sg); - start_addr = ib_sg_dma_address(ibdev, next_sg); - - if (end_addr == start_addr) { - start_check = 0; - continue; - } else - start_check = 1; - - if (!IS_4K_ALIGNED(end_addr)) - break; - } - ret_len = (next_sg) ? i : i+1; - - if (unlikely(ret_len != data->dma_nents)) - iser_warn("rdma alignment violation (%d/%d aligned)\n", - ret_len, data->dma_nents); - - return ret_len; -} - static void iser_data_buf_dump(struct iser_data_buf *data, struct ib_device *ibdev) { @@ -483,31 +262,6 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, return 0; } -static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, - struct iser_data_buf *mem, - enum iser_data_dir cmd_dir) -{ - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; - struct iser_device *device = iser_task->iser_conn->ib_conn.device; - - iscsi_conn->fmr_unalign_cnt++; - - if (iser_debug_level > 0) - iser_data_buf_dump(mem, device->ib_device); - - /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task, mem, - (cmd_dir == ISER_DIR_OUT) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - - /* allocate copy buf, if we are writing, copy the */ - /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0) - return -ENOMEM; - - return 0; -} - /** * iser_reg_page_vec - Register physical memory * @@ -779,26 +533,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return 0; } -static int -iser_handle_unaligned_buf(struct iscsi_iser_task *task, - struct iser_data_buf *mem, - enum iser_data_dir dir) -{ - struct iser_conn *iser_conn = task->iser_conn; - struct iser_device *device = iser_conn->ib_conn.device; - int err, aligned_len; - - aligned_len = iser_data_buf_aligned_len(mem, device->ib_device, - iser_conn->scsi_sg_tablesize); - if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(task, mem, dir); - if (err) - return err; - } - - return 0; -} - static int iser_reg_prot_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, @@ -841,10 +575,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, bool use_dma_key; int err; - err = iser_handle_unaligned_buf(task, mem, dir); - if (unlikely(err)) - return err; - use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); @@ -867,10 +597,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, if (scsi_prot_sg_count(task->sc)) { mem = &task->prot[dir]; - err = iser_handle_unaligned_buf(task, mem, dir); - if (unlikely(err)) - goto err_reg; - err = iser_reg_prot_sg(task, mem, desc, use_dma_key, prot_reg); if (unlikely(err)) -- cgit v1.2.3 From 630c3183cec33b502e80846e728c3ff165d1c84d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:12:59 +0300 Subject: IB/iser: Enable SG clustering iser is perfectly capable supporting SG clustering as it translates the SG list to a page vector. Enabling SG clustering can dramatically reduce the number of SG elements, which doesn't make much of a difference at this point, but with arbitrary SG list support, reducing the number of SG elements can benefit greatly as as it would reduce the length of the HW descriptors array. Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2ea0a14125e8..9080161e01af 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -986,7 +986,7 @@ static struct scsi_host_template iscsi_iser_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .target_alloc = iscsi_target_alloc, - .use_clustering = DISABLE_CLUSTERING, + .use_clustering = ENABLE_CLUSTERING, .slave_alloc = iscsi_iser_slave_alloc, .proc_name = "iscsi_iser", .this_id = -1, -- cgit v1.2.3 From 565edd1d555513ab5d67a847d50d7c14c82ef6c3 Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Thu, 22 Oct 2015 15:20:08 +0300 Subject: IB/addr: Pass network namespace as a parameter Add network namespace support to the ib_addr module. For that, all the address resolution and matching should be done using the appropriate namespace instead of init_net. This is achieved by: 1. Adding an explicit network namespace argument to exported function that require a namespace. 2. Saving the namespace in the rdma_addr_client structure. 3. Using it when calling networking functions. In order to preserve the behavior of calling modules, &init_net is passed as the parameter in calls from other modules. This is modified as namespace support is added on more levels. Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Guy Shapiro Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 17 +++++++++-------- drivers/infiniband/core/cma.c | 1 + include/rdma/ib_addr.h | 16 +++++++++++++++- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d3c42b3c1b51..34b1adad07aa 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, switch (addr->sa_family) { case AF_INET: - dev = ip_dev_find(&init_net, + dev = ip_dev_find(dev_addr->net, ((struct sockaddr_in *) addr)->sin_addr.s_addr); if (!dev) @@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; - #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - if (ipv6_chk_addr(&init_net, + for_each_netdev_rcu(dev_addr->net, dev) { + if (ipv6_chk_addr(dev_addr->net, &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); @@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; - rt = ip_route_output_key(&init_net, &fl4); + rt = ip_route_output_key(addr->net, &fl4); if (IS_ERR(rt)) { ret = PTR_ERR(rt); goto out; @@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - dst = ip6_route_output(&init_net, NULL, &fl6); + dst = ip6_route_output(addr->net, NULL, &fl6); if ((ret = dst->error)) goto put; if (ipv6_addr_any(&fl6.saddr)) { - ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr); if (ret) goto put; @@ -477,6 +476,7 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi memset(&dev_addr, 0, sizeof(dev_addr)); dev_addr.bound_dev_if = if_index; + dev_addr.net = &init_net; ctx.addr = &dev_addr; init_completion(&ctx.comp); @@ -511,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) rdma_gid2ip(&gid_addr._sockaddr, sgid); memset(&dev_addr, 0, sizeof(dev_addr)); + dev_addr.net = &init_net; ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) return ret; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index be41784a62e4..80a8b469a9a8 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -610,6 +610,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); + id_priv->id.route.addr.dev_addr.net = &init_net; return &id_priv->id; } diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 17e4a8bdecab..11528591d0d7 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -47,6 +47,7 @@ #include #include #include +#include struct rdma_addr_client { atomic_t refcount; @@ -64,6 +65,16 @@ void rdma_addr_register_client(struct rdma_addr_client *client); */ void rdma_addr_unregister_client(struct rdma_addr_client *client); +/** + * struct rdma_dev_addr - Contains resolved RDMA hardware addresses + * @src_dev_addr: Source MAC address. + * @dst_dev_addr: Destination MAC address. + * @broadcast: Broadcast address of the device. + * @dev_type: The interface hardware type of the device. + * @bound_dev_if: An optional device interface index. + * @transport: The transport type used. + * @net: Network namespace containing the bound_dev_if net_dev. + */ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; @@ -71,11 +82,14 @@ struct rdma_dev_addr { unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; + struct net *net; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. + * + * The dev_addr->net field must be initialized. */ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, u16 *vlan_id); @@ -90,7 +104,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has - * been invoked. + * been invoked. The net field of the addr struct must be valid. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. -- cgit v1.2.3 From 4be74b42a6d05a74a21362010cd3920fa17f63c7 Mon Sep 17 00:00:00 2001 From: Haggai Eran Date: Thu, 22 Oct 2015 15:20:09 +0300 Subject: IB/cma: Separate port allocation to network namespaces Keep a struct for each network namespace containing the IDRs for the RDMA CM port spaces. The struct is created dynamically using the generic_net mechanism. This patch is internal infrastructure work for the following patches. In this patch, init_net is statically used as the network namespace for the new port-space API. Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Guy Shapiro Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 94 ++++++++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 80a8b469a9a8..ac03c32ca7f1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -44,6 +44,8 @@ #include #include +#include +#include #include #include #include @@ -110,22 +112,33 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(tcp_ps); -static DEFINE_IDR(udp_ps); -static DEFINE_IDR(ipoib_ps); -static DEFINE_IDR(ib_ps); +static int cma_pernet_id; -static struct idr *cma_idr(enum rdma_port_space ps) +struct cma_pernet { + struct idr tcp_ps; + struct idr udp_ps; + struct idr ipoib_ps; + struct idr ib_ps; +}; + +static struct cma_pernet *cma_pernet(struct net *net) +{ + return net_generic(net, cma_pernet_id); +} + +static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps) { + struct cma_pernet *pernet = cma_pernet(net); + switch (ps) { case RDMA_PS_TCP: - return &tcp_ps; + return &pernet->tcp_ps; case RDMA_PS_UDP: - return &udp_ps; + return &pernet->udp_ps; case RDMA_PS_IPOIB: - return &ipoib_ps; + return &pernet->ipoib_ps; case RDMA_PS_IB: - return &ib_ps; + return &pernet->ib_ps; default: return NULL; } @@ -145,24 +158,25 @@ struct rdma_bind_list { unsigned short port; }; -static int cma_ps_alloc(enum rdma_port_space ps, +static int cma_ps_alloc(struct net *net, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); } -static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum) +static struct rdma_bind_list *cma_ps_find(struct net *net, + enum rdma_port_space ps, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); return idr_find(idr, snum); } -static void cma_ps_remove(enum rdma_port_space ps, int snum) +static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum) { - struct idr *idr = cma_idr(ps); + struct idr *idr = cma_pernet_idr(net, ps); idr_remove(idr, snum); } @@ -1325,7 +1339,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } - bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id), + bind_list = cma_ps_find(&init_net, + rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); if (IS_ERR(id_priv) && *net_dev) { @@ -1403,7 +1418,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { - cma_ps_remove(bind_list->ps, bind_list->port); + cma_ps_remove(&init_net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); @@ -2693,7 +2708,7 @@ static int cma_alloc_port(enum rdma_port_space ps, if (!bind_list) return -ENOMEM; - ret = cma_ps_alloc(ps, bind_list, snum); + ret = cma_ps_alloc(&init_net, ps, bind_list, snum); if (ret < 0) goto err; @@ -2718,7 +2733,7 @@ static int cma_alloc_any_port(enum rdma_port_space ps, rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && - !cma_ps_find(ps, (unsigned short)rover)) { + !cma_ps_find(&init_net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid @@ -2784,7 +2799,7 @@ static int cma_use_port(enum rdma_port_space ps, if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - bind_list = cma_ps_find(ps, snum); + bind_list = cma_ps_find(&init_net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { @@ -4004,6 +4019,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = { .module = THIS_MODULE }, }; +static int cma_init_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_init(&pernet->tcp_ps); + idr_init(&pernet->udp_ps); + idr_init(&pernet->ipoib_ps); + idr_init(&pernet->ib_ps); + + return 0; +} + +static void cma_exit_net(struct net *net) +{ + struct cma_pernet *pernet = cma_pernet(net); + + idr_destroy(&pernet->tcp_ps); + idr_destroy(&pernet->udp_ps); + idr_destroy(&pernet->ipoib_ps); + idr_destroy(&pernet->ib_ps); +} + +static struct pernet_operations cma_pernet_operations = { + .init = cma_init_net, + .exit = cma_exit_net, + .id = &cma_pernet_id, + .size = sizeof(struct cma_pernet), +}; + static int __init cma_init(void) { int ret; @@ -4012,6 +4056,10 @@ static int __init cma_init(void) if (!cma_wq) return -ENOMEM; + ret = register_pernet_subsys(&cma_pernet_operations); + if (ret) + goto err_wq; + ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); register_netdevice_notifier(&cma_nb); @@ -4029,6 +4077,7 @@ err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); +err_wq: destroy_workqueue(cma_wq); return ret; } @@ -4040,11 +4089,8 @@ static void __exit cma_cleanup(void) unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); destroy_workqueue(cma_wq); - idr_destroy(&tcp_ps); - idr_destroy(&udp_ps); - idr_destroy(&ipoib_ps); - idr_destroy(&ib_ps); } module_init(cma_init); -- cgit v1.2.3 From fa20105e09e97e81aadf02f722c31195e4a75c84 Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Thu, 22 Oct 2015 15:20:10 +0300 Subject: IB/cma: Add support for network namespaces Add support for network namespaces in the ib_cma module. This is accomplished by: 1. Adding network namespace parameter for rdma_create_id. This parameter is used to populate the network namespace field in rdma_id_private. rdma_create_id keeps a reference on the network namespace. 2. Using the network namespace from the rdma_id instead of init_net inside of ib_cma, when listening on an ID and when looking for an ID for an incoming request. 3. Decrementing the reference count for the appropriate network namespace when calling rdma_destroy_id. In order to preserve the current behavior init_net is passed when calling from other modules. Signed-off-by: Guy Shapiro Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 46 +++++++++++++--------- drivers/infiniband/core/ucma.c | 3 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 4 +- include/rdma/rdma_cm.h | 6 ++- net/9p/trans_rdma.c | 4 +- net/rds/ib.c | 2 +- net/rds/ib_cm.c | 2 +- net/rds/iw.c | 2 +- net/rds/iw_cm.c | 2 +- net/rds/rdma_transport.c | 4 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 +- net/sunrpc/xprtrdma/verbs.c | 3 +- 14 files changed, 52 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ac03c32ca7f1..7e93eb1f33eb 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -600,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { @@ -624,7 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); - id_priv->id.route.addr.dev_addr.net = &init_net; + id_priv->id.route.addr.dev_addr.net = get_net(net); return &id_priv->id; } @@ -1278,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv, cma_protocol_roce(&id_priv->id); return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), &init_net) && + (net_eq(dev_net(net_dev), addr->dev_addr.net) && addr->dev_addr.bound_dev_if == net_dev->ifindex); } @@ -1339,7 +1340,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } - bind_list = cma_ps_find(&init_net, + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); @@ -1411,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; + struct net *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; @@ -1418,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { - cma_ps_remove(&init_net, bind_list->ps, bind_list->port); + cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); @@ -1477,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); @@ -1607,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; @@ -1662,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; @@ -1901,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.event_handler, + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { @@ -2029,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; + struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; @@ -2708,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps, if (!bind_list) return -ENOMEM; - ret = cma_ps_alloc(&init_net, ps, bind_list, snum); + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, + snum); if (ret < 0) goto err; @@ -2727,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps, static unsigned int last_used_port; int low, high, remaining; unsigned int rover; + struct net *net = id_priv->id.route.addr.dev_addr.net; - inet_get_local_port_range(&init_net, &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && - !cma_ps_find(&init_net, ps, (unsigned short)rover)) { + !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid @@ -2799,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps, if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - bind_list = cma_ps_find(&init_net, ps, snum); + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { @@ -2991,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (addr->sa_family == AF_INET) id_priv->afonly = 1; #if IS_ENABLED(CONFIG_IPV6) - else if (addr->sa_family == AF_INET6) - id_priv->afonly = init_net.ipv6.sysctl.bindv6only; + else if (addr->sa_family == AF_INET6) { + struct net *net = id_priv->id.route.addr.dev_addr.net; + + id_priv->afonly = net->ipv6.sysctl.bindv6only; + } #endif } ret = cma_get_port(id_priv); @@ -3797,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; if ((dev_addr->bound_dev_if == ndev->ifindex) && + (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); @@ -3822,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; - if (dev_net(ndev) != &init_net) - return NOTIFY_DONE; - if (event != NETDEV_BONDING_FAILOVER) return NOTIFY_DONE; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3a342dfaac15..e80c107450ab 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -472,7 +472,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = rdma_create_id(&init_net, ucma_event_handler, ctx, cmd.ps, + qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 85132d867bc8..f3122372d49f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1017,7 +1017,7 @@ int iser_connect(struct iser_conn *iser_conn, ib_conn->beacon.wr_id = ISER_BEACON_WRID; ib_conn->beacon.opcode = IB_WR_SEND; - ib_conn->cma_id = rdma_create_id(iser_cma_handler, + ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b58ac25ce19d..7d41c9d6d6bf 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3096,7 +3096,7 @@ isert_setup_id(struct isert_np *isert_np) sa = (struct sockaddr *)&np->np_sockaddr; isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); - id = rdma_create_id(isert_cma_handler, isert_np, + id = rdma_create_id(&init_net, isert_cma_handler, isert_np, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index f4b6c33ac318..5015a9e830bd 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -128,7 +128,9 @@ extern kib_tunables_t kiblnd_tunables; IBLND_CREDIT_HIGHWATER_V1 : \ *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ -#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt) +#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \ + cb, dev, \ + ps, qpt) static inline int kiblnd_concurrent_sends_v1(void) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c92522c192d2..463ec0ccbe51 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -160,13 +160,17 @@ struct rdma_cm_id { /** * rdma_create_id - Create an RDMA identifier. * + * @net: The network namespace in which to create the new id. * @event_handler: User callback invoked to report events associated with the * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. * @qp_type: type of queue pair associated with the id. + * + * The id holds a reference on the network namespace until it is destroyed. */ -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ba1210253f5e..52b4a2f993f2 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, - IB_QPT_RC); + rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index 2d3f2ab475df..cd64ef9a4748 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -317,7 +317,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 9043f5c04787..f5a98068faf0 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -565,7 +565,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/iw.c b/net/rds/iw.c index 3df0295c6659..576f1825fc55 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a6553a6fb2bc..aea4c911bc76 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index b9b40af5345b..9c1fed81bf0f 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, - IB_QPT_RC); + cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fcc3eb80c265..4a41122d586f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (!cma_xprt) return ERR_PTR(-ENOMEM); - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, - IB_QPT_RC); + listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81e8d31f3abc..6c06ba088fea 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); + id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", -- cgit v1.2.3 From 95893dde99d9d14f8a6ac99ea3103792a8da5f25 Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Thu, 22 Oct 2015 15:20:11 +0300 Subject: IB/ucma: Take the network namespace from the process Add support for network namespaces from user space. This is done by passing the network namespace of the process instead of init_net. Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Guy Shapiro Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index e80c107450ab..8b5a934e1133 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -472,8 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(&init_net, ucma_event_handler, ctx, cmd.ps, - qp_type); + ctx->cm_id = rdma_create_id(current->nsproxy->net_ns, + ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; -- cgit v1.2.3 From 4c67e2bfc8b7121d51434362fa7c2d012f8bcf1b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:24 +0300 Subject: IB/core: Introduce new fast registration API The new fast registration verb ib_map_mr_sg receives a scatterlist and converts it to a page list under the verbs API thus hiding the specific HW mapping details away from the consumer. The provider drivers are provided with a generic helper ib_sg_to_pages that converts a scatterlist into a vector of page addresses. The drivers can still perform any HW specific page address setting by passing a set_page function pointer which will be invoked for each page address. This allows drivers to avoid keeping a shadow page vectors and convert them to HW specific translations by doing extra copies. This API will allow ULPs to remove the duplicated code of constructing a page vector from a given sg list. The send work request ib_reg_wr also shrinks as it will contain only mr, key and access flags in addition. Signed-off-by: Sagi Grimberg Tested-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 107 ++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 44 +++++++++++++++++ 2 files changed, 151 insertions(+) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 46d97f09fbae..5673df061213 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1496,3 +1496,110 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; } EXPORT_SYMBOL(ib_check_mr_status); + +/** + * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list + * and set it the memory region. + * @mr: memory region + * @sg: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @page_size: page vector desired page size + * + * Constraints: + * - The first sg element is allowed to have an offset. + * - Each sg element must be aligned to page_size (or physically + * contiguous to the previous element). In case an sg element has a + * non contiguous offset, the mapping prefix will not include it. + * - The last sg element is allowed to have length less than page_size. + * - If sg_nents total byte length exceeds the mr max_num_sge * page_size + * then only max_num_sg entries will be mapped. + * + * Returns the number of sg elements that were mapped to the memory region. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents, + unsigned int page_size) +{ + if (unlikely(!mr->device->map_mr_sg)) + return -ENOSYS; + + mr->page_size = page_size; + + return mr->device->map_mr_sg(mr, sg, sg_nents); +} +EXPORT_SYMBOL(ib_map_mr_sg); + +/** + * ib_sg_to_pages() - Convert the largest prefix of a sg list + * to a page vector + * @mr: memory region + * @sgl: dma mapped scatterlist + * @sg_nents: number of entries in sg + * @set_page: driver page assignment function pointer + * + * Core service helper for drivers to covert the largest + * prefix of given sg list to a page vector. The sg list + * prefix converted is the prefix that meet the requirements + * of ib_map_mr_sg. + * + * Returns the number of sg elements that were assigned to + * a page vector. + */ +int ib_sg_to_pages(struct ib_mr *mr, + struct scatterlist *sgl, + int sg_nents, + int (*set_page)(struct ib_mr *, u64)) +{ + struct scatterlist *sg; + u64 last_end_dma_addr = 0, last_page_addr = 0; + unsigned int last_page_off = 0; + u64 page_mask = ~((u64)mr->page_size - 1); + int i; + + mr->iova = sg_dma_address(&sgl[0]); + mr->length = 0; + + for_each_sg(sgl, sg, sg_nents, i) { + u64 dma_addr = sg_dma_address(sg); + unsigned int dma_len = sg_dma_len(sg); + u64 end_dma_addr = dma_addr + dma_len; + u64 page_addr = dma_addr & page_mask; + + if (i && page_addr != dma_addr) { + if (last_end_dma_addr != dma_addr) { + /* gap */ + goto done; + + } else if (last_page_off + dma_len <= mr->page_size) { + /* chunk this fragment with the last */ + mr->length += dma_len; + last_end_dma_addr += dma_len; + last_page_off += dma_len; + continue; + } else { + /* map starting from the next page */ + page_addr = last_page_addr + mr->page_size; + dma_len -= mr->page_size - last_page_off; + } + } + + do { + if (unlikely(set_page(mr, page_addr))) + goto done; + page_addr += mr->page_size; + } while (page_addr < end_dma_addr); + + mr->length += dma_len; + last_end_dma_addr = end_dma_addr; + last_page_addr = end_dma_addr & page_mask; + last_page_off = end_dma_addr & ~page_mask; + } + +done: + return i; +} +EXPORT_SYMBOL(ib_sg_to_pages); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 85103aff909b..065d37c13aa6 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -738,6 +738,7 @@ enum ib_wc_opcode { IB_WC_LSO, IB_WC_LOCAL_INV, IB_WC_FAST_REG_MR, + IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, /* @@ -1025,6 +1026,7 @@ enum ib_wr_opcode { IB_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV, IB_WR_FAST_REG_MR, + IB_WR_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, @@ -1157,6 +1159,18 @@ static inline struct ib_fast_reg_wr *fast_reg_wr(struct ib_send_wr *wr) return container_of(wr, struct ib_fast_reg_wr, wr); } +struct ib_reg_wr { + struct ib_send_wr wr; + struct ib_mr *mr; + u32 key; + int access; +}; + +static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) +{ + return container_of(wr, struct ib_reg_wr, wr); +} + struct ib_bind_mw_wr { struct ib_send_wr wr; struct ib_mw *mw; @@ -1369,6 +1383,9 @@ struct ib_mr { struct ib_uobject *uobject; u32 lkey; u32 rkey; + u64 iova; + u32 length; + unsigned int page_size; atomic_t usecnt; /* count number of MWs */ }; @@ -1753,6 +1770,9 @@ struct ib_device { struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); + int (*map_mr_sg)(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents); struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, int page_list_len); void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); @@ -3059,4 +3079,28 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); +int ib_map_mr_sg(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents, + unsigned int page_size); + +static inline int +ib_map_mr_sg_zbva(struct ib_mr *mr, + struct scatterlist *sg, + int sg_nents, + unsigned int page_size) +{ + int n; + + n = ib_map_mr_sg(mr, sg, sg_nents, page_size); + mr->iova = 0; + + return n; +} + +int ib_sg_to_pages(struct ib_mr *mr, + struct scatterlist *sgl, + int sg_nents, + int (*set_page)(struct ib_mr *, u64)); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From a7060009161344637da8cb4a389ead5fc156ea37 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:25 +0300 Subject: IB/mlx5: Remove dead fmr code Just function declarations - no need for those laying arround. If for some reason someone will want FMR support in mlx5, it should be easy enough to restore a few structs. Signed-off-by: Sagi Grimberg Reviewed-by: Bart Van Assche Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 29f3ecdbe790..f789a3e6c215 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -364,20 +364,6 @@ enum { MLX5_FMR_BUSY, }; -struct mlx5_ib_fmr { - struct ib_fmr ibfmr; - struct mlx5_core_mr mr; - int access_flags; - int state; - /* protect fmr state - */ - spinlock_t lock; - u64 wrid; - struct ib_send_wr wr[2]; - u8 page_shift; - struct ib_fast_reg_page_list page_list; -}; - struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry @@ -462,11 +448,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } -static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) -{ - return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); -} - static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); @@ -582,12 +563,6 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); -struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, - struct ib_fmr_attr *fmr_attr); -int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int npages, u64 iova); -int mlx5_ib_unmap_fmr(struct list_head *fmr_list); -int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, -- cgit v1.2.3 From 8a187ee52b043f8201e7089e5e538974142722e0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:26 +0300 Subject: IB/mlx5: Support the new memory registration API Support the new memory registration API by allocating a private page list array in mlx5_ib_mr and populate it when mlx5_ib_map_mr_sg is invoked. Also, support IB_WR_REG_MR by setting the exact WQE as IB_WR_FAST_REG_MR, just take the needed information from different places: - page_size, iova, length, access flags (ib_mr) - page array (mlx5_ib_mr) - key (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 ++ drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 9 ++++ drivers/infiniband/hw/mlx5/mr.c | 93 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 83 ++++++++++++++++++++++++++++++++ 5 files changed, 189 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 2d0dbbf38ceb..206930096d56 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -109,6 +109,9 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; + case IB_WR_REG_MR: + return IB_WC_REG_MR; + case IB_WR_FAST_REG_MR: return IB_WC_FAST_REG_MR; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f1ccd40beae9..7e93044ea6ce 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1425,6 +1425,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; + dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f789a3e6c215..a29b28c31c44 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -319,6 +319,11 @@ enum mlx5_ib_mtt_access_flags { struct mlx5_ib_mr { struct ib_mr ibmr; + void *descs; + dma_addr_t desc_map; + int ndescs; + int max_descs; + int desc_size; struct mlx5_core_mr mmr; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; @@ -330,6 +335,7 @@ struct mlx5_ib_mr { struct mlx5_create_mkey_mbox_out out; struct mlx5_core_sig_ctx *sig; int live; + void *descs_alloc; }; struct mlx5_ib_fast_reg_page_list { @@ -560,6 +566,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b30d4ae0fb61..6177e8053888 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1153,6 +1153,52 @@ error: return err; } +static int +mlx5_alloc_priv_descs(struct ib_device *device, + struct mlx5_ib_mr *mr, + int ndescs, + int desc_size) +{ + int size = ndescs * desc_size; + int add_size; + int ret; + + add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->descs_alloc) + return -ENOMEM; + + mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); + + mr->desc_map = dma_map_single(device->dma_device, mr->descs, + size, DMA_TO_DEVICE); + if (dma_mapping_error(device->dma_device, mr->desc_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->descs_alloc); + + return ret; +} + +static void +mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +{ + if (mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + + dma_unmap_single(device->dma_device, mr->desc_map, + size, DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; + } +} + static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); @@ -1172,6 +1218,8 @@ static int clean_mr(struct mlx5_ib_mr *mr) mr->sig = NULL; } + mlx5_free_priv_descs(mr); + if (!umred) { err = destroy_mkey(dev, mr); if (err) { @@ -1261,6 +1309,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (mr_type == IB_MR_TYPE_MEM_REG) { access_mode = MLX5_ACCESS_MODE_MTT; in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(u64)); + if (err) + goto err_free_in; + + mr->desc_size = sizeof(u64); + mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; @@ -1317,6 +1373,7 @@ err_destroy_psv: mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); } + mlx5_free_priv_descs(mr); err_free_sig: kfree(mr->sig); err_free_in: @@ -1408,3 +1465,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, done: return ret; } + +static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int n; + + mr->ndescs = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, + mr->desc_size * mr->max_descs, + DMA_TO_DEVICE); + + return n; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9bad68820061..da2b46c2624a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -65,6 +65,7 @@ static const u32 mlx5_ib_opcode[] = { [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, @@ -1896,6 +1897,17 @@ static __be64 sig_mkey_mask(void) return cpu_to_be64(result); } +static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, + struct mlx5_ib_mr *mr) +{ + int ndescs = mr->ndescs; + + memset(umr, 0, sizeof(*umr)); + umr->flags = MLX5_UMR_CHECK_NOT_FREE; + umr->klm_octowords = get_klm_octo(ndescs); + umr->mkey_mask = frwr_mkey_mask(); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1987,6 +1999,22 @@ static u8 get_umr_flags(int acc) MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } +static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, + struct mlx5_ib_mr *mr, + u32 key, int access) +{ + int ndescs = ALIGN(mr->ndescs, 8) >> 1; + + memset(seg, 0, sizeof(*seg)); + seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT; + seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(mr->ibmr.iova); + seg->len = cpu_to_be64(mr->ibmr.length); + seg->xlt_oct_size = cpu_to_be32(ndescs); + seg->log2_page_size = ilog2(mr->ibmr.page_size); +} + static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, int li, int *writ) { @@ -2028,6 +2056,17 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w mlx5_mkey_variant(umrwr->mkey)); } +static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, + struct mlx5_ib_mr *mr, + struct mlx5_ib_pd *pd) +{ + int bcount = mr->desc_size * mr->ndescs; + + dseg->addr = cpu_to_be64(mr->desc_map); + dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); +} + static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, struct ib_send_wr *wr, struct mlx5_core_dev *mdev, @@ -2433,6 +2472,38 @@ static int set_psv_wr(struct ib_sig_domain *domain, return 0; } +static int set_reg_wr(struct mlx5_ib_qp *qp, + struct ib_reg_wr *wr, + void **seg, int *size) +{ + struct mlx5_ib_mr *mr = to_mmr(wr->mr); + struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Invalid IB_SEND_INLINE send flag\n"); + return -EINVAL; + } + + set_reg_umr_seg(*seg, mr); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_reg_mkey_seg(*seg, mr, wr->key, wr->access); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -2675,6 +2746,18 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_MR: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_REG_MR; + ctrl->imm = cpu_to_be32(reg_wr(wr)->key); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size); + if (err) { + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; mr = to_mmr(sig_handover_wr(wr)->sig_mr); -- cgit v1.2.3 From 1b2cd0fc673c0bf9a37c6456267a7246fdd6a816 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:27 +0300 Subject: IB/mlx4: Support the new memory registration API Support the new memory registration API by allocating a private page list array in mlx4_ib_mr and populate it when mlx4_ib_map_mr_sg is invoked. Also, support IB_WR_REG_MR by setting the exact WQE as IB_WR_FAST_REG_MR, just take the needed information from different places: - page_size, iova, length, access flags (ib_mr) - page array (mlx4_ib_mr) - key (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Tested-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 1 + drivers/infiniband/hw/mlx4/main.c | 1 + drivers/infiniband/hw/mlx4/mlx4_ib.h | 10 ++++ drivers/infiniband/hw/mlx4/mr.c | 101 ++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx4/qp.c | 25 +++++++++ 5 files changed, 132 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5fd49f9435f9..2ea4125b7903 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -819,6 +819,7 @@ repoll: break; case MLX4_OPCODE_FMR: wc->opcode = IB_WC_FAST_REG_MR; + /* TODO: wc->opcode = IB_WC_REG_MR; */ break; case MLX4_OPCODE_LOCAL_INVAL: wc->opcode = IB_WC_LOCAL_INV; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f63d5427bfc0..7e87599fbdcb 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2266,6 +2266,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; + ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 28a99e08030e..64c80e15a9f8 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -129,10 +129,17 @@ struct mlx4_ib_cq { struct list_head recv_qp_list; }; +#define MLX4_MR_PAGES_ALIGN 0x40 + struct mlx4_ib_mr { struct ib_mr ibmr; + __be64 *pages; + dma_addr_t page_map; + u32 npages; + u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; + void *pages_alloc; }; struct mlx4_ib_mw { @@ -714,6 +721,9 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len); void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 5bba176e9dfa..e5a32e4520e8 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) struct mlx4_ib_mr *mr; int err; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err; int n; - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -271,11 +271,59 @@ release_mpt_entry: return err; } +static int +mlx4_alloc_priv_pages(struct ib_device *device, + struct mlx4_ib_mr *mr, + int max_pages) +{ + int size = max_pages * sizeof(u64); + int add_size; + int ret; + + add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + + mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); + if (!mr->pages_alloc) + return -ENOMEM; + + mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); + + mr->page_map = dma_map_single(device->dma_device, mr->pages, + size, DMA_TO_DEVICE); + + if (dma_mapping_error(device->dma_device, mr->page_map)) { + ret = -ENOMEM; + goto err; + } + + return 0; +err: + kfree(mr->pages_alloc); + + return ret; +} + +static void +mlx4_free_priv_pages(struct mlx4_ib_mr *mr) +{ + if (mr->pages) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_pages * sizeof(u64); + + dma_unmap_single(device->dma_device, mr->page_map, + size, DMA_TO_DEVICE); + kfree(mr->pages_alloc); + mr->pages = NULL; + } +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); int ret; + mlx4_free_priv_pages(mr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); if (ret) return ret; @@ -362,7 +410,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, max_num_sg > MLX4_MAX_FAST_REG_PAGES) return ERR_PTR(-EINVAL); - mr = kmalloc(sizeof *mr, GFP_KERNEL); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -371,18 +419,25 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free; + err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); + if (err) + goto err_free_mr; + + mr->max_pages = max_num_sg; + err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) - goto err_mr; + goto err_free_pl; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->umem = NULL; return &mr->ibmr; -err_mr: +err_free_pl: + mlx4_free_priv_pages(mr); +err_free_mr: (void) mlx4_mr_free(dev->dev, &mr->mmr); - err_free: kfree(mr); return ERR_PTR(err); @@ -528,3 +583,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) return err; } + +static int mlx4_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + + if (unlikely(mr->npages == mr->max_pages)) + return -ENOMEM; + + mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); + + return 0; +} + +int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct mlx4_ib_mr *mr = to_mmr(ibmr); + int rc; + + mr->npages = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page); + + ib_dma_sync_single_for_device(ibmr->device, mr->page_map, + sizeof(u64) * mr->max_pages, + DMA_TO_DEVICE); + + return rc; +} diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f2b2a61898f8..0067f4b4dc09 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -112,6 +112,7 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), @@ -2505,6 +2506,22 @@ static __be32 convert_access(int acc) cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } +static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, + struct ib_reg_wr *wr) +{ + struct mlx4_ib_mr *mr = to_mmr(wr->mr); + + fseg->flags = convert_access(wr->access); + fseg->mem_key = cpu_to_be32(wr->key); + fseg->buf_list = cpu_to_be64(mr->page_map); + fseg->start_addr = cpu_to_be64(mr->ibmr.iova); + fseg->reg_len = cpu_to_be64(mr->ibmr.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size)); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_fast_reg_wr *wr) { @@ -2866,6 +2883,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; + case IB_WR_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_reg_seg(wqe, reg_wr(wr)); + wqe += sizeof(struct mlx4_wqe_fmr_seg); + size += sizeof(struct mlx4_wqe_fmr_seg) / 16; + break; + case IB_WR_BIND_MW: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); -- cgit v1.2.3 From 2eaa1c5647a3fd21684120f63e414367055249c7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:28 +0300 Subject: RDMA/ocrdma: Support the new memory registration API Support the new memory registration API by allocating a private page list array in ocrdma_mr and populate it when ocrdma_map_mr_sg is invoked. Also, support IB_WR_REG_MR by duplicating IB_WR_FAST_REG_MR, but take the needed information from different places: - page_size, iova, length, access flags (ib_mr) - page array (ocrdma_mr) - key (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma.h | 2 + drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 90 +++++++++++++++++++++++++++++ drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 3 + 4 files changed, 96 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 8f047f996e22..ae80590aabdf 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -193,6 +193,8 @@ struct ocrdma_mr { struct ib_mr ibmr; struct ib_umem *umem; struct ocrdma_hw_mr hwmr; + u64 *pages; + u32 npages; }; struct ocrdma_stats { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index cb1af0fb4051..19a5ccd6e5fc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -180,6 +180,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; dev->ibdev.alloc_mr = ocrdma_alloc_mr; + dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index dd00219b7d15..49481dfaa493 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); + kfree(mr->pages); ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ @@ -2177,6 +2178,61 @@ static int get_encoded_page_size(int pg_sz) return i; } +static int ocrdma_build_reg(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ib_reg_wr *wr) +{ + u64 fbo; + struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); + struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr); + struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; + struct ocrdma_pbe *pbe; + u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + int num_pbes = 0, i; + + wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); + + hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + + if (wr->access & IB_ACCESS_LOCAL_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; + if (wr->access & IB_ACCESS_REMOTE_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; + if (wr->access & IB_ACCESS_REMOTE_READ) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; + hdr->lkey = wr->key; + hdr->total_len = mr->ibmr.length; + + fbo = mr->ibmr.iova - mr->pages[0]; + + fast_reg->va_hi = upper_32_bits(mr->ibmr.iova); + fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff); + fast_reg->fbo_hi = upper_32_bits(fbo); + fast_reg->fbo_lo = (u32) fbo & 0xffffffff; + fast_reg->num_sges = mr->npages; + fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size); + + pbe = pbl_tbl->va; + for (i = 0; i < mr->npages; i++) { + u64 buf_addr = mr->pages[i]; + + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + + return 0; +} static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, struct ib_send_wr *send_wr) @@ -2304,6 +2360,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: status = ocrdma_build_fr(qp, hdr, wr); break; + case IB_WR_REG_MR: + status = ocrdma_build_reg(qp, hdr, reg_wr(wr)); + break; default: status = -EINVAL; break; @@ -3054,6 +3113,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, if (!mr) return ERR_PTR(-ENOMEM); + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) { + status = -ENOMEM; + goto pl_err; + } + status = ocrdma_get_pbl_info(dev, mr, max_num_sg); if (status) goto pbl_err; @@ -3077,6 +3142,8 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); pbl_err: + kfree(mr->pages); +pl_err: kfree(mr); return ERR_PTR(-ENOMEM); } @@ -3263,3 +3330,26 @@ pbl_err: kfree(mr); return ERR_PTR(status); } + +static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + if (unlikely(mr->npages == mr->hwmr.num_pbes)) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + + return 0; +} + +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 308c16857a5d..1b2d3ac10203 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -125,6 +125,9 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int ocrdma_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device *ibdev, int page_list_len); -- cgit v1.2.3 From 14fb4171ab1c298101697fe844dbf062ff6f4adf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:29 +0300 Subject: RDMA/cxgb3: Support the new memory registration API Support the new memory registration API by allocating a private page list array in iwch_mr and populate it when iwch_map_mr_sg is invoked. Also, support IB_WR_REG_MR by duplicating build_fastreg just take the needed information from different places: - page_size, iova, length (ib_mr) - page array (iwch_mr) - key, access flags (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_provider.c | 33 ++++++++++++++++++++ drivers/infiniband/hw/cxgb3/iwch_provider.h | 2 ++ drivers/infiniband/hw/cxgb3/iwch_qp.c | 48 +++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 93308c45f298..f7b915378343 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -463,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) return -EINVAL; mhp = to_iwch_mr(ib_mr); + kfree(mhp->pages); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, @@ -821,6 +822,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, if (!mhp) goto err; + mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mhp->pages) { + ret = -ENOMEM; + goto pl_err; + } + mhp->rhp = rhp; ret = iwch_alloc_pbl(mhp, max_num_sg); if (ret) @@ -847,11 +854,36 @@ err3: err2: iwch_free_pbl(mhp); err1: + kfree(mhp->pages); +pl_err: kfree(mhp); err: return ERR_PTR(ret); } +static int iwch_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct iwch_mr *mhp = to_iwch_mr(ibmr); + + if (unlikely(mhp->npages == mhp->attr.pbl_size)) + return -ENOMEM; + + mhp->pages[mhp->npages++] = addr; + + return 0; +} + +static int iwch_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct iwch_mr *mhp = to_iwch_mr(ibmr); + + mhp->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); +} + static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( struct ib_device *device, int page_list_len) @@ -1450,6 +1482,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; + dev->ibdev.map_mr_sg = iwch_map_mr_sg; dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 87c14b0c5ac0..2ac85b86a680 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -77,6 +77,8 @@ struct iwch_mr { struct iwch_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *pages; + u32 npages; }; typedef struct iwch_mw iwch_mw_handle; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index bac0508fedd9..a09ea538e990 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -146,6 +146,49 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } +static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + struct iwch_mr *mhp = to_iwch_mr(wr->mr); + int i; + __be64 *p; + + if (mhp->npages > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->key); + wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); + wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(mhp->npages) | + V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < mhp->npages; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)mhp->pages[i]); + } + *flit_cnt = 5 + mhp->npages; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *send_wr, u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) { @@ -419,6 +462,11 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, &wr_cnt, &qhp->wq); break; + case IB_WR_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) t3_wr_flags |= T3_LOCAL_FENCE_FLAG; -- cgit v1.2.3 From 8376b86de7d35d43cf1a33a1f43bc015b5a095d9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:30 +0300 Subject: iw_cxgb4: Support the new memory registration API Support the new memory registration API by allocating a private page list array in c4iw_mr and populate it when c4iw_map_mr_sg is invoked. Also, support IB_WR_REG_MR by duplicating build_fastreg just take the needed information from different places: - page_size, iova, length (ib_mr) - page array (c4iw_mr) - key, access flags (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 7 ++++ drivers/infiniband/hw/cxgb4/mem.c | 38 +++++++++++++++++ drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 74 ++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index c7bb38c931a5..6a8e2696455f 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -386,6 +386,10 @@ struct c4iw_mr { struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; + u64 *mpl; + dma_addr_t mpl_addr; + u32 max_mpl_len; + u32 mpl_len; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) @@ -973,6 +977,9 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); int c4iw_dealloc_mw(struct ib_mw *mw); struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 140415d31bcc..1e46a260a0fa 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -863,6 +863,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, u32 mmid; u32 stag = 0; int ret = 0; + int length = roundup(max_num_sg * sizeof(u64), 32); if (mr_type != IB_MR_TYPE_MEM_REG || max_num_sg > t4_max_fr_depth(use_dsgl)) @@ -876,6 +877,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, goto err; } + mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev, + length, &mhp->mpl_addr, GFP_KERNEL); + if (!mhp->mpl) { + ret = -ENOMEM; + goto err_mpl; + } + mhp->max_mpl_len = length; + mhp->rhp = rhp; ret = alloc_pbl(mhp, max_num_sg); if (ret) @@ -905,11 +914,37 @@ err2: c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); err1: + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); +err_mpl: kfree(mhp); err: return ERR_PTR(ret); } +static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + + if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + return -ENOMEM; + + mhp->mpl[mhp->mpl_len++] = addr; + + return 0; +} + +int c4iw_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + + mhp->mpl_len = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); +} + struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, int page_list_len) { @@ -970,6 +1005,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->mpl) + dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev, + mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr); dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); if (mhp->attr.pbl_size) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index df3b3f1ad066..e292308e82bd 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -557,6 +557,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.bind_mw = c4iw_bind_mw; dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; + dev->ibdev.map_mr_sg = c4iw_map_mr_sg; dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; dev->ibdev.attach_mcast = c4iw_multicast_attach; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 1dc9f11a4243..aac75a068768 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -605,10 +605,76 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_reg_wr *wr, u8 *len16, u8 t5dev) +{ + struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); + struct fw_ri_immd *imdp; + __be64 *p; + int i; + int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32); + int rem; + + if (mhp->mpl_len > t4_max_fr_depth(use_dsgl)) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; + wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12; + wqe->fr.addr_type = FW_RI_VA_BASED_TO; + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access); + wqe->fr.len_hi = 0; + wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length); + wqe->fr.stag = cpu_to_be32(wr->key); + wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & + 0xffffffff); + + if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + struct fw_ri_dsgl *sglp; + + for (i = 0; i < mhp->mpl_len; i++) + mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]); + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(mhp->mpl_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < mhp->mpl_len; i++) { + *p = cpu_to_be64((u64)mhp->mpl[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); + } + return 0; +} + static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *send_wr, u8 *len16, u8 t5dev) { struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); + struct fw_ri_immd *imdp; __be64 *p; int i; @@ -815,6 +881,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qhp->rhp->rdev.lldi.adapter_type) ? 1 : 0); break; + case IB_WR_REG_MR: + fw_opcode = FW_RI_FR_NSMR_WR; + swsqe->opcode = FW_RI_FAST_REGISTER; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); + break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; -- cgit v1.2.3 From 38071a461f0a87a86ece011356bdac991795ce04 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:31 +0300 Subject: IB/qib: Support the new memory registration API Support the new memory registration API by allocating a private page list array in qib_mr and populate it when qib_map_mr_sg is invoked. Also, support IB_WR_REG_MR by duplicating qib_fastreg_mr just take the needed information from different places: - page_size, iova, length (ib_mr) - page array (qib_mr) - key, access flags (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_keys.c | 56 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/qib/qib_mr.c | 32 ++++++++++++++++++++ drivers/infiniband/hw/qib/qib_verbs.c | 9 +++++- drivers/infiniband/hw/qib/qib_verbs.h | 8 +++++ 4 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index eaf139a33b2e..95b8b9110fc6 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -390,3 +390,59 @@ bail: spin_unlock_irqrestore(&rkt->lock, flags); return ret; } + +/* + * Initialize the memory region specified by the work request. + */ +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr) +{ + struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct qib_mr *mr = to_imr(wr->mr); + struct qib_mregion *mrg; + u32 key = wr->key; + unsigned i, n, m; + int ret = -EINVAL; + unsigned long flags; + u64 *page_list; + size_t ps; + + spin_lock_irqsave(&rkt->lock, flags); + if (pd->user || key == 0) + goto bail; + + mrg = rcu_dereference_protected( + rkt->table[(key >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); + if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd)) + goto bail; + + if (mr->npages > mrg->max_segs) + goto bail; + + ps = mr->ibmr.page_size; + if (mr->ibmr.length > ps * mr->npages) + goto bail; + + mrg->user_base = mr->ibmr.iova; + mrg->iova = mr->ibmr.iova; + mrg->lkey = key; + mrg->length = mr->ibmr.length; + mrg->access_flags = wr->access; + page_list = mr->pages; + m = 0; + n = 0; + for (i = 0; i < mr->npages; i++) { + mrg->map[m]->segs[n].vaddr = (void *) page_list[i]; + mrg->map[m]->segs[n].length = ps; + if (++n == QIB_SEGSZ) { + m++; + n = 0; + } + } + + ret = 0; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index 19220dcb9a3b..dcabf1b2e263 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr) int ret = 0; unsigned long timeout; + kfree(mr->pages); qib_free_lkey(&mr->mr); qib_put_mr(&mr->mr); /* will set completion if last */ @@ -340,7 +341,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd, if (IS_ERR(mr)) return (struct ib_mr *)mr; + mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); + if (!mr->pages) + goto err; + return &mr->ibmr; + +err: + qib_dereg_mr(&mr->ibmr); + return ERR_PTR(-ENOMEM); +} + +static int qib_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct qib_mr *mr = to_imr(ibmr); + + if (unlikely(mr->npages == mr->mr.max_segs)) + return -ENOMEM; + + mr->pages[mr->npages++] = addr; + + return 0; +} + +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct qib_mr *mr = to_imr(ibmr); + + mr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page); } struct ib_fast_reg_page_list * diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a6b0b098ff30..a1e53d7b662b 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -362,7 +362,10 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_FAST_REG_MR) { + if (wr->opcode == IB_WR_REG_MR) { + if (qib_reg_mr(qp, reg_wr(wr))) + goto bail_inval; + } else if (wr->opcode == IB_WR_FAST_REG_MR) { if (qib_fast_reg_mr(qp, wr)) goto bail_inval; } else if (qp->ibqp.qp_type == IB_QPT_UC) { @@ -401,6 +404,9 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, if (qp->ibqp.qp_type != IB_QPT_UC && qp->ibqp.qp_type != IB_QPT_RC) memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_REG_MR) + memcpy(&wqe->reg_wr, reg_wr(wr), + sizeof(wqe->reg_wr)); else if (wr->opcode == IB_WR_FAST_REG_MR) memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), sizeof(wqe->fast_reg_wr)); @@ -2260,6 +2266,7 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->reg_user_mr = qib_reg_user_mr; ibdev->dereg_mr = qib_dereg_mr; ibdev->alloc_mr = qib_alloc_mr; + ibdev->map_mr_sg = qib_map_mr_sg; ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; ibdev->alloc_fmr = qib_alloc_fmr; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 8aa16851a5e6..648f6c64f00d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -330,6 +330,8 @@ struct qib_mr { struct ib_mr ibmr; struct ib_umem *umem; struct qib_mregion mr; /* must be last */ + u64 *pages; + u32 npages; }; /* @@ -341,6 +343,7 @@ struct qib_swqe { union { struct ib_send_wr wr; /* don't use wr.sg_list */ struct ib_ud_wr ud_wr; + struct ib_reg_wr reg_wr; struct ib_fast_reg_wr fast_reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; @@ -1044,12 +1047,17 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_entries); +int qib_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents); + struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( struct ib_device *ibdev, int page_list_len); void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); +int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr); struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); -- cgit v1.2.3 From 0ba24dd39a3f401dd12d129a65d2e2e7fe7f96eb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:32 +0300 Subject: RDMA/nes: Support the new memory registration API Support the new memory registration API by allocating a private page list array in nes_mr and populate it when nes_map_mr_sg is invoked. Also, support IB_WR_REG_MR by duplicating IB_WR_FAST_REG_MR handling and take the needed information from different places: - page_size, iova, length (ib_mr) - page array (nes_mr) - key, access flags (ib_reg_wr) The IB_WR_FAST_REG_MR handlers will be removed later when all the ULPs will be converted. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_verbs.c | 117 +++++++++++++++++++++++++++++++++- drivers/infiniband/hw/nes/nes_verbs.h | 4 ++ 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index f71b37b75f82..dc8a03b0ca0d 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -51,6 +51,7 @@ atomic_t qps_created; atomic_t sw_qps_destroyed; static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); +static int nes_dereg_mr(struct ib_mr *ib_mr); /** * nes_alloc_mw @@ -443,9 +444,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd, } else { kfree(nesmr); nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } + + nesmr->pages = pci_alloc_consistent(nesdev->pcidev, + max_num_sg * sizeof(u64), + &nesmr->paddr); + if (!nesmr->paddr) + goto err; + + nesmr->max_pages = max_num_sg; + return ibmr; + +err: + nes_dereg_mr(ibmr); + + return ERR_PTR(-ENOMEM); +} + +static int nes_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct nes_mr *nesmr = to_nesmr(ibmr); + + if (unlikely(nesmr->npages == nesmr->max_pages)) + return -ENOMEM; + + nesmr->pages[nesmr->npages++] = cpu_to_le64(addr); + + return 0; +} + +static int nes_map_mr_sg(struct ib_mr *ibmr, + struct scatterlist *sg, + int sg_nents) +{ + struct nes_mr *nesmr = to_nesmr(ibmr); + + nesmr->npages = 0; + + return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); } /* @@ -2683,6 +2721,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) u16 major_code; u16 minor_code; + + if (nesmr->pages) + pci_free_consistent(nesdev->pcidev, + nesmr->max_pages * sizeof(u64), + nesmr->pages, + nesmr->paddr); + if (nesmr->region) { ib_umem_release(nesmr->region); } @@ -3513,6 +3558,75 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, wqe_misc); break; } + case IB_WR_REG_MR: + { + struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr); + int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size); + int flags = reg_wr(ib_wr)->access; + + if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { + nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); + err = -EINVAL; + break; + } + wqe_misc = NES_IWARP_SQ_OP_FAST_REG; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, + mr->ibmr.iova); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, + mr->ibmr.length); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, + reg_wr(ib_wr)->key); + + if (page_shift == 12) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; + } else if (page_shift == 21) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; + } else { + nes_debug(NES_DBG_IW_TX, "Invalid page shift," + " ib_wr=%u, max=1\n", ib_wr->num_sge); + err = -EINVAL; + break; + } + + /* Set access_flags */ + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; + if (flags & IB_ACCESS_LOCAL_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE; + + if (flags & IB_ACCESS_REMOTE_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE; + + if (flags & IB_ACCESS_REMOTE_READ) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ; + + if (flags & IB_ACCESS_MW_BIND) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; + + /* Fill in PBL info: */ + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, + mr->paddr); + + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, + mr->npages * 8); + + nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " + "length: %d, rkey: %0x, pgl_paddr: %llx, " + "page_list_len: %u, wqe_misc: %x\n", + (unsigned long long) mr->ibmr.iova, + mr->ibmr.length, + reg_wr(ib_wr)->key, + (unsigned long long) mr->paddr, + mr->npages, + wqe_misc); + break; + } default: /* error */ err = -EINVAL; @@ -3940,6 +4054,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.bind_mw = nes_bind_mw; nesibdev->ibdev.alloc_mr = nes_alloc_mr; + nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 309b31c31ae1..a204b677af22 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -79,6 +79,10 @@ struct nes_mr { u16 pbls_used; u8 mode; u8 pbl_4k; + __le64 *pages; + dma_addr_t paddr; + u32 max_pages; + u32 npages; }; struct nes_hw_pb { -- cgit v1.2.3 From 39405885005a8b01e3523d3351ea74ae3b965842 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:33 +0300 Subject: IB/iser: Port to new fast registration API Remove fastreg page list allocation as the page vector is now private to the provider. Instead of constructing the page list and fast_req work request, call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.h | 10 ++---- drivers/infiniband/ulp/iser/iser_memory.c | 54 ++++++++++++++----------------- drivers/infiniband/ulp/iser/iser_verbs.c | 16 +-------- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2484bee993ec..8a5998e6a407 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -230,7 +230,7 @@ enum iser_data_dir { */ struct iser_data_buf { struct scatterlist *sg; - unsigned int size; + int size; unsigned long data_len; unsigned int dma_nents; }; @@ -297,7 +297,7 @@ struct iser_tx_desc { u8 wr_idx; union iser_wr { struct ib_send_wr send; - struct ib_fast_reg_wr fast_reg; + struct ib_reg_wr fast_reg; struct ib_sig_handover_wr sig; } wrs[ISER_MAX_WRS]; struct iser_mem_reg data_reg; @@ -412,7 +412,6 @@ struct iser_device { * * @mr: memory region * @fmr_pool: pool of fmrs - * @frpl: fast reg page list used by frwrs * @page_vec: fast reg page list used by fmr pool * @mr_valid: is mr valid indicator */ @@ -421,10 +420,7 @@ struct iser_reg_resources { struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; }; - union { - struct ib_fast_reg_page_list *frpl; - struct iser_page_vec *page_vec; - }; + struct iser_page_vec *page_vec; u8 mr_valid:1; }; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index b29fda3e8e74..ea765fb9664d 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -472,7 +472,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, sig_reg->sge.addr = 0; sig_reg->sge.length = scsi_transfer_length(iser_task->sc); - iser_dbg("sig reg: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n", + iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n", sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, sig_reg->sge.length); err: @@ -484,47 +484,41 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_reg_resources *rsc, struct iser_mem_reg *reg) { - struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn; - struct iser_device *device = ib_conn->device; - struct ib_mr *mr = rsc->mr; - struct ib_fast_reg_page_list *frpl = rsc->frpl; struct iser_tx_desc *tx_desc = &iser_task->desc; - struct ib_fast_reg_wr *wr; - int offset, size, plen; - - plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, - &offset, &size); - if (plen * SIZE_4K < size) { - iser_err("fast reg page_list too short to hold this SG\n"); - return -EINVAL; - } + struct ib_mr *mr = rsc->mr; + struct ib_reg_wr *wr; + int n; if (!rsc->mr_valid) iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); - wr = fast_reg_wr(iser_tx_next_wr(tx_desc)); - wr->wr.opcode = IB_WR_FAST_REG_MR; + n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); + if (unlikely(n != mem->size)) { + iser_err("failed to map sg (%d/%d)\n", + n, mem->size); + return n < 0 ? n : -EINVAL; + } + + wr = reg_wr(iser_tx_next_wr(tx_desc)); + wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_id = ISER_FASTREG_LI_WRID; wr->wr.send_flags = 0; - wr->iova_start = frpl->page_list[0] + offset; - wr->page_list = frpl; - wr->page_list_len = plen; - wr->page_shift = SHIFT_4K; - wr->length = size; - wr->rkey = mr->rkey; - wr->access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); + wr->wr.num_sge = 0; + wr->mr = mr; + wr->key = mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + rsc->mr_valid = 0; reg->sge.lkey = mr->lkey; reg->rkey = mr->rkey; - reg->sge.addr = frpl->page_list[0] + offset; - reg->sge.length = size; + reg->sge.addr = mr->iova; + reg->sge.length = mr->length; - iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," - " length=0x%x\n", reg->sge.lkey, reg->rkey, - reg->sge.addr, reg->sge.length); + iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n", + reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length); return 0; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e7f3b204239b..a93070210109 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -293,35 +293,21 @@ iser_alloc_reg_res(struct ib_device *ib_device, { int ret; - res->frpl = ib_alloc_fast_reg_page_list(ib_device, size); - if (IS_ERR(res->frpl)) { - ret = PTR_ERR(res->frpl); - iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", - ret); - return PTR_ERR(res->frpl); - } - res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); if (IS_ERR(res->mr)) { ret = PTR_ERR(res->mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - goto fast_reg_mr_failure; + return ret; } res->mr_valid = 1; return 0; - -fast_reg_mr_failure: - ib_free_fast_reg_page_list(res->frpl); - - return ret; } static void iser_free_reg_res(struct iser_reg_resources *rsc) { ib_dereg_mr(rsc->mr); - ib_free_fast_reg_page_list(rsc->frpl); } static int -- cgit v1.2.3 From 16c2d702f2c76892f47944a53d9e1ce42023046d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:34 +0300 Subject: iser-target: Port to new memory registration API Remove fastreg page list allocation as the page vector is now private to the provider. Instead of constructing the page list and fast_req work request, call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/isert/ib_isert.c | 130 +++++++------------------------- drivers/infiniband/ulp/isert/ib_isert.h | 2 - 2 files changed, 28 insertions(+), 104 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 1b4d13d323b6..dfbbbb28090b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -473,10 +473,8 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) list_for_each_entry_safe(fr_desc, tmp, &isert_conn->fr_pool, list) { list_del(&fr_desc->list); - ib_free_fast_reg_page_list(fr_desc->data_frpl); ib_dereg_mr(fr_desc->data_mr); if (fr_desc->pi_ctx) { - ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); ib_dereg_mr(fr_desc->pi_ctx->prot_mr); ib_dereg_mr(fr_desc->pi_ctx->sig_mr); kfree(fr_desc->pi_ctx); @@ -504,22 +502,13 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, return -ENOMEM; } - pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(pi_ctx->prot_frpl)) { - isert_err("Failed to allocate prot frpl err=%ld\n", - PTR_ERR(pi_ctx->prot_frpl)); - ret = PTR_ERR(pi_ctx->prot_frpl); - goto err_pi_ctx; - } - pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(pi_ctx->prot_mr)) { isert_err("Failed to allocate prot frmr err=%ld\n", PTR_ERR(pi_ctx->prot_mr)); ret = PTR_ERR(pi_ctx->prot_mr); - goto err_prot_frpl; + goto err_pi_ctx; } desc->ind |= ISERT_PROT_KEY_VALID; @@ -539,8 +528,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc, err_prot_mr: ib_dereg_mr(pi_ctx->prot_mr); -err_prot_frpl: - ib_free_fast_reg_page_list(pi_ctx->prot_frpl); err_pi_ctx: kfree(pi_ctx); @@ -551,34 +538,18 @@ static int isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, struct fast_reg_descriptor *fr_desc) { - int ret; - - fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc->data_frpl)) { - isert_err("Failed to allocate data frpl err=%ld\n", - PTR_ERR(fr_desc->data_frpl)); - return PTR_ERR(fr_desc->data_frpl); - } - fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc->data_mr)) { isert_err("Failed to allocate data frmr err=%ld\n", PTR_ERR(fr_desc->data_mr)); - ret = PTR_ERR(fr_desc->data_mr); - goto err_data_frpl; + return PTR_ERR(fr_desc->data_mr); } fr_desc->ind |= ISERT_DATA_KEY_VALID; isert_dbg("Created fr_desc %p\n", fr_desc); return 0; - -err_data_frpl: - ib_free_fast_reg_page_list(fr_desc->data_frpl); - - return ret; } static int @@ -2534,45 +2505,6 @@ unmap_cmd: return ret; } -static int -isert_map_fr_pagelist(struct ib_device *ib_dev, - struct scatterlist *sg_start, int sg_nents, u64 *fr_pl) -{ - u64 start_addr, end_addr, page, chunk_start = 0; - struct scatterlist *tmp_sg; - int i = 0, new_chunk, last_ent, n_pages; - - n_pages = 0; - new_chunk = 1; - last_ent = sg_nents - 1; - for_each_sg(sg_start, tmp_sg, sg_nents, i) { - start_addr = ib_sg_dma_address(ib_dev, tmp_sg); - if (new_chunk) - chunk_start = start_addr; - end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg); - - isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n", - i, (unsigned long long)tmp_sg->dma_address, - tmp_sg->length); - - if ((end_addr & ~PAGE_MASK) && i < last_ent) { - new_chunk = 0; - continue; - } - new_chunk = 1; - - page = chunk_start & PAGE_MASK; - do { - fr_pl[n_pages++] = page; - isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n", - n_pages - 1, page); - page += PAGE_SIZE; - } while (page < end_addr); - } - - return n_pages; -} - static inline void isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) { @@ -2598,11 +2530,9 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, struct isert_device *device = isert_conn->device; struct ib_device *ib_dev = device->ib_device; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; - struct ib_fast_reg_wr fr_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr inv_wr, *bad_wr, *wr = NULL; - int ret, pagelist_len; - u32 page_off; + int ret, n; if (mem->dma_nents == 1) { sge->lkey = device->pd->local_dma_lkey; @@ -2613,45 +2543,41 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, return 0; } - if (ind == ISERT_DATA_KEY_VALID) { + if (ind == ISERT_DATA_KEY_VALID) /* Registering data buffer */ mr = fr_desc->data_mr; - frpl = fr_desc->data_frpl; - } else { + else /* Registering protection buffer */ mr = fr_desc->pi_ctx->prot_mr; - frpl = fr_desc->pi_ctx->prot_frpl; - } - - page_off = mem->offset % PAGE_SIZE; - - isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", - fr_desc, mem->nents, mem->offset); - - pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents, - &frpl->page_list[0]); if (!(fr_desc->ind & ind)) { isert_inv_rkey(&inv_wr, mr); wr = &inv_wr; } - /* Prepare FASTREG WR */ - memset(&fr_wr, 0, sizeof(fr_wr)); - fr_wr.wr.wr_id = ISER_FASTREG_LI_WRID; - fr_wr.wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.iova_start = frpl->page_list[0] + page_off; - fr_wr.page_list = frpl; - fr_wr.page_list_len = pagelist_len; - fr_wr.page_shift = PAGE_SHIFT; - fr_wr.length = mem->len; - fr_wr.rkey = mr->rkey; - fr_wr.access_flags = IB_ACCESS_LOCAL_WRITE; + n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE); + if (unlikely(n != mem->nents)) { + isert_err("failed to map mr sg (%d/%d)\n", + n, mem->nents); + return n < 0 ? n : -EINVAL; + } + + isert_dbg("Use fr_desc %p sg_nents %d offset %u\n", + fr_desc, mem->nents, mem->offset); + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID; + reg_wr.wr.send_flags = 0; + reg_wr.wr.num_sge = 0; + reg_wr.mr = mr; + reg_wr.key = mr->lkey; + reg_wr.access = IB_ACCESS_LOCAL_WRITE; if (!wr) - wr = &fr_wr.wr; + wr = ®_wr.wr; else - wr->next = &fr_wr.wr; + wr->next = ®_wr.wr; ret = ib_post_send(isert_conn->qp, wr, &bad_wr); if (ret) { @@ -2661,8 +2587,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn, fr_desc->ind &= ~ind; sge->lkey = mr->lkey; - sge->addr = frpl->page_list[0] + page_off; - sge->length = mem->len; + sge->addr = mr->iova; + sge->length = mr->length; isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", sge->addr, sge->length, sge->lkey); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 360461819452..3d7fbc47c343 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -84,14 +84,12 @@ enum isert_indicator { struct pi_context { struct ib_mr *prot_mr; - struct ib_fast_reg_page_list *prot_frpl; struct ib_mr *sig_mr; }; struct fast_reg_descriptor { struct list_head list; struct ib_mr *data_mr; - struct ib_fast_reg_page_list *data_frpl; u8 ind; struct pi_context *pi_ctx; }; -- cgit v1.2.3 From 4143f34e01e9cdf1882f98c54d9073e4de8c28fb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:35 +0300 Subject: xprtrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/frwr_ops.c | 118 +++++++++++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- 2 files changed, 69 insertions(+), 52 deletions(-) diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0d2f46f600b6..a1434447b0d6 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); - if (IS_ERR(f->fr_pgl)) + + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); + if (!f->sg) goto out_list_err; + + sg_init_table(f->sg, depth); + return 0; out_mr_err: @@ -163,9 +167,9 @@ out_mr_err: return rc; out_list_err: - rc = PTR_ERR(f->fr_pgl); - dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", - __func__, rc); + rc = -ENOMEM; + dprintk("RPC: %s: sg allocation failure\n", + __func__); ib_dereg_mr(f->fr_mr); return rc; } @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) if (rc) dprintk("RPC: %s: ib_dereg_mr status %i\n", __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + kfree(r->r.frmr.sg); } static int @@ -312,14 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr *bad_wr; + int rc, i, n, dma_nents; u8 key; - int len, pageoff; - int i, rc; - int seg_len; - u64 pa; - int page_no; mw = seg1->rl_mw; seg1->rl_mw = NULL; @@ -332,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->r.frmr; frmr->fr_state = FRMR_IS_VALID; + mr = frmr->fr_mr; - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; - for (page_no = i = 0; i < nsegs;) { - rpcrdma_map_one(device, seg, direction); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - frmr->fr_pgl->page_list[page_no++] = pa; - pa += PAGE_SIZE; - } - len += seg->mr_len; + for (i = 0; i < nsegs;) { + if (seg->mr_page) + sg_set_page(&frmr->sg[i], + seg->mr_page, + seg->mr_len, + offset_in_page(seg->mr_offset)); + else + sg_set_buf(&frmr->sg[i], seg->mr_offset, + seg->mr_len); + ++seg; ++i; + /* Check for holes */ if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", - __func__, mw, i, len); - - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.page_list = frmr->fr_pgl; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.page_list_len = page_no; - fastreg_wr.length = len; - fastreg_wr.access_flags = writing ? - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : - IB_ACCESS_REMOTE_READ; - mr = frmr->fr_mr; + frmr->sg_nents = i; + + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); + if (!dma_nents) { + pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", + __func__, frmr->sg, frmr->sg_nents); + return -ENOMEM; + } + + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; + } + + dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", + __func__, mw, frmr->sg_nents, mr->length); + key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.rkey = mr->rkey; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = (uintptr_t)mw; + reg_wr.wr.num_sge = 0; + reg_wr.wr.send_flags = 0; + reg_wr.mr = mr; + reg_wr.key = mr->rkey; + reg_wr.access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); if (rc) goto out_senderr; + seg1->mr_dir = direction; seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = i; - seg1->mr_len = len; - return i; + seg1->mr_base = mr->iova; + seg1->mr_nsegs = frmr->sg_nents; + seg1->mr_len = mr->length; + + return frmr->sg_nents; out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - while (i--) - rpcrdma_unmap_one(device, --seg); + ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction); __frwr_queue_recovery(mw); return rc; } @@ -403,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw = seg1->rl_mw; + struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; dprintk("RPC: %s: FRMR %p\n", __func__, mw); seg1->rl_mw = NULL; - mw->r.frmr.fr_state = FRMR_IS_INVALID; + frmr->fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia->ri_device, seg++); + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c09414e6f91b..c82abf44e39d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -193,7 +193,8 @@ enum rpcrdma_frmr_state { }; struct rpcrdma_frmr { - struct ib_fast_reg_page_list *fr_pgl; + struct scatterlist *sg; + int sg_nents; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; struct work_struct fr_work; -- cgit v1.2.3 From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:36 +0300 Subject: svcrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 +-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 76 ++++++++++++++++++-------------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 +++++--------- 3 files changed, 55 insertions(+), 61 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..1e4438ea2380 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge { }; struct svc_rdma_fastreg_mr { struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; + struct scatterlist *sg; + int sg_nents; unsigned long access_flags; - unsigned long map_len; enum dma_data_direction direction; struct list_head frmr_list; }; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7be42d0da19e..cb0991345816 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -220,12 +220,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, { struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; u8 key; - int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); - int ret, read, pno; + int ret, read, pno, dma_nents, n; u32 pg_off = *page_offset; u32 pg_no = *page_no; @@ -234,16 +234,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; - pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); + read = min_t(int, nents << PAGE_SHIFT, rs_length); - frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = pages_needed << PAGE_SHIFT; - frmr->page_list_len = pages_needed; + frmr->sg_nents = nents; - for (pno = 0; pno < pages_needed; pno++) { + for (pno = 0; pno < nents; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; @@ -251,17 +249,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, head->arg.len += len; if (!pg_off) head->count++; + + sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], + len, pg_off); + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; - frmr->page_list->page_list[pno] = - ib_dma_map_page(xprt->sc_cm_id->device, - head->arg.pages[pg_no], 0, - PAGE_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[pno]); - if (ret) - goto err; - atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; @@ -277,28 +270,42 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, + frmr->direction); + if (!dma_nents) { + pr_err("svcrdma: failed to dma map sg %p\n", + frmr->sg); + return -ENOMEM; + } + atomic_inc(&xprt->sc_dma_used); + + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", + frmr->mr, n, frmr->sg_nents); + return n < 0 ? n : -EINVAL; + } + /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); - ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].addr = frmr->mr->iova; ctxt->sge[0].lkey = frmr->mr->lkey; - ctxt->sge[0].length = read; + ctxt->sge[0].length = frmr->mr->length; ctxt->count = 1; ctxt->read_hdr = head; - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.iova_start = (unsigned long)frmr->kva; - fastreg_wr.page_list = frmr->page_list; - fastreg_wr.page_list_len = frmr->page_list_len; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.length = frmr->map_len; - fastreg_wr.access_flags = frmr->access_flags; - fastreg_wr.rkey = frmr->mr->lkey; - fastreg_wr.wr.next = &read_wr.wr; + /* Prepare REG WR */ + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = 0; + reg_wr.wr.send_flags = IB_SEND_SIGNALED; + reg_wr.wr.num_sge = 0; + reg_wr.mr = frmr->mr; + reg_wr.key = frmr->mr->lkey; + reg_wr.access = frmr->access_flags; + reg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); @@ -324,7 +331,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr.wr); + ret = svc_rdma_send(xprt, ®_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -338,7 +345,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - svc_rdma_unmap_dma(ctxt); + ib_dma_unmap_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4a41122d586f..a266e870d870 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) { struct ib_mr *mr; - struct ib_fast_reg_page_list *pl; + struct scatterlist *sg; struct svc_rdma_fastreg_mr *frmr; u32 num_sg; @@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) if (IS_ERR(mr)) goto err_free_frmr; - pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, - num_sg); - if (IS_ERR(pl)) + sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL); + if (!sg) goto err_free_mr; + sg_init_table(sg, RPCSVC_MAXPAGES); + frmr->mr = mr; - frmr->page_list = pl; + frmr->sg = sg; INIT_LIST_HEAD(&frmr->frmr_list); return frmr; @@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) frmr = list_entry(xprt->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); + kfree(frmr->sg); ib_dereg_mr(frmr->mr); - ib_free_fast_reg_page_list(frmr->page_list); kfree(frmr); } } @@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) frmr = list_entry(rdma->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); - frmr->map_len = 0; - frmr->page_list_len = 0; + frmr->sg_nents = 0; } spin_unlock_bh(&rdma->sc_frmr_q_lock); if (frmr) @@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) return rdma_alloc_frmr(rdma); } -static void frmr_unmap_dma(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - int page_no; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - dma_addr_t addr = frmr->page_list->page_list[page_no]; - if (ib_dma_mapping_error(frmr->mr->device, addr)) - continue; - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE, - frmr->direction); - } -} - void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, struct svc_rdma_fastreg_mr *frmr) { if (frmr) { - frmr_unmap_dma(rdma, frmr); + ib_dma_unmap_sg(rdma->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); + atomic_dec(&rdma->sc_dma_used); spin_lock_bh(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); -- cgit v1.2.3 From 9ddc87374af90ee694bcc8d3412c6f2b51df1fea Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:37 +0300 Subject: RDS/IW: Convert to new memory registration API Get rid of fast_reg page list and its construction. Instead, just pass the RDS sg list to ib_map_mr_sg and post the new ib_reg_wr. This is done both for server IW RDMA_READ registration and the client remote key registration. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Acked-by: Santosh Shilimkar Signed-off-by: Doug Ledford --- net/rds/iw.h | 5 +-- net/rds/iw_rdma.c | 128 +++++++++++++++++++----------------------------------- net/rds/iw_send.c | 57 ++++++++++++------------ 3 files changed, 75 insertions(+), 115 deletions(-) diff --git a/net/rds/iw.h b/net/rds/iw.h index fe858e5dd8d1..5af01d1758b3 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -74,13 +74,12 @@ struct rds_iw_send_work { struct rm_rdma_op *s_op; struct rds_iw_mapping *s_mapping; struct ib_mr *s_mr; - struct ib_fast_reg_page_list *s_page_list; unsigned char s_remap_count; union { struct ib_send_wr s_send_wr; struct ib_rdma_wr s_rdma_wr; - struct ib_fast_reg_wr s_fast_reg_wr; + struct ib_reg_wr s_reg_wr; }; struct ib_sge s_sge[RDS_IW_MAX_SGE]; unsigned long s_queued; @@ -199,7 +198,7 @@ struct rds_iw_device { /* Magic WR_ID for ACKs */ #define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) -#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) +#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL) #define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) struct rds_iw_statistics { diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index f8a612cc69e6..47bd68451ff7 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -47,7 +47,6 @@ struct rds_iw_mr { struct rdma_cm_id *cm_id; struct ib_mr *mr; - struct ib_fast_reg_page_list *page_list; struct rds_iw_mapping mapping; unsigned char remap_count; @@ -77,8 +76,8 @@ struct rds_iw_mr_pool { static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); static void rds_iw_mr_pool_flush_worker(struct work_struct *work); -static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); -static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, +static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); +static int rds_iw_map_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr, struct scatterlist *sg, unsigned int nents); static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); @@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, sg->bytes = 0; } -static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, - struct rds_iw_scatterlist *sg) +static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, + struct rds_iw_scatterlist *sg) { struct ib_device *dev = rds_iwdev->dev; - u64 *dma_pages = NULL; - int i, j, ret; + int i, ret; WARN_ON(sg->dma_len); sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); if (unlikely(!sg->dma_len)) { printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); - return ERR_PTR(-EBUSY); + return -EBUSY; } sg->bytes = 0; @@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, if (sg->dma_npages > fastreg_message_size) goto out_unmap; - dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC); - if (!dma_pages) { - ret = -ENOMEM; - goto out_unmap; - } - for (i = j = 0; i < sg->dma_len; ++i) { - unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); - u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); - u64 end_addr; - end_addr = dma_addr + dma_len; - dma_addr &= ~PAGE_MASK; - for (; dma_addr < end_addr; dma_addr += PAGE_SIZE) - dma_pages[j++] = dma_addr; - BUG_ON(j > sg->dma_npages); - } - - return dma_pages; + return 0; out_unmap: ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); sg->dma_len = 0; - kfree(dma_pages); - return ERR_PTR(ret); + return ret; } @@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev) INIT_LIST_HEAD(&ibmr->mapping.m_list); ibmr->mapping.m_mr = ibmr; - err = rds_iw_init_fastreg(pool, ibmr); + err = rds_iw_init_reg(pool, ibmr); if (err) goto out_no_cigar; @@ -622,7 +603,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, ibmr->cm_id = cm_id; ibmr->device = rds_iwdev; - ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); + ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents); if (ret == 0) *key_ret = ibmr->mr->rkey; else @@ -638,7 +619,7 @@ out: } /* - * iWARP fastreg handling + * iWARP reg handling * * The life cycle of a fastreg registration is a bit different from * FMRs. @@ -650,7 +631,7 @@ out: * This creates a bit of a problem for us, as we do not have the destination * IP in GET_MR, so the connection must be setup prior to the GET_MR call for * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit - * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request + * will try to queue a LOCAL_INV (if needed) and a REG_MR work request * before queuing the SEND. When completions for these arrive, they are * dispatched to the MR has a bit set showing that RDMa can be performed. * @@ -659,11 +640,10 @@ out: * The expectation there is that this invalidation step includes ALL * PREVIOUSLY FREED MRs. */ -static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, - struct rds_iw_mr *ibmr) +static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr) { struct rds_iw_device *rds_iwdev = pool->device; - struct ib_fast_reg_page_list *page_list = NULL; struct ib_mr *mr; int err; @@ -676,56 +656,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, return err; } - /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages - * is not filled in. - */ - page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size); - if (IS_ERR(page_list)) { - err = PTR_ERR(page_list); - - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err); - ib_dereg_mr(mr); - return err; - } - - ibmr->page_list = page_list; ibmr->mr = mr; return 0; } -static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) +static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping) { struct rds_iw_mr *ibmr = mapping->m_mr; - struct ib_fast_reg_wr f_wr; + struct rds_iw_scatterlist *m_sg = &mapping->m_sg; + struct ib_reg_wr reg_wr; struct ib_send_wr *failed_wr; - int ret; + int ret, n; + + n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE); + if (unlikely(n != m_sg->len)) + return n < 0 ? n : -EINVAL; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = RDS_IW_REG_WR_ID; + reg_wr.wr.num_sge = 0; + reg_wr.mr = ibmr->mr; + reg_wr.key = mapping->m_rkey; + reg_wr.access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; /* - * Perform a WR for the fast_reg_mr. Each individual page + * Perform a WR for the reg_mr. Each individual page * in the sg list is added to the fast reg page list and placed - * inside the fast_reg_mr WR. The key used is a rolling 8bit + * inside the reg_mr WR. The key used is a rolling 8bit * counter, which should guarantee uniqueness. */ ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); mapping->m_rkey = ibmr->mr->rkey; - memset(&f_wr, 0, sizeof(f_wr)); - f_wr.wr.wr_id = RDS_IW_FAST_REG_WR_ID; - f_wr.wr.opcode = IB_WR_FAST_REG_MR; - f_wr.length = mapping->m_sg.bytes; - f_wr.rkey = mapping->m_rkey; - f_wr.page_list = ibmr->page_list; - f_wr.page_list_len = mapping->m_sg.dma_len; - f_wr.page_shift = PAGE_SHIFT; - f_wr.access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; - f_wr.iova_start = 0; - f_wr.wr.send_flags = IB_SEND_SIGNALED; - - failed_wr = &f_wr.wr; - ret = ib_post_send(ibmr->cm_id->qp, &f_wr.wr, &failed_wr); - BUG_ON(failed_wr != &f_wr.wr); + failed_wr = ®_wr.wr; + ret = ib_post_send(ibmr->cm_id->qp, ®_wr.wr, &failed_wr); + BUG_ON(failed_wr != ®_wr.wr); if (ret) printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", __func__, __LINE__, ret); @@ -757,21 +725,20 @@ out: return ret; } -static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, - struct rds_iw_mr *ibmr, - struct scatterlist *sg, - unsigned int sg_len) +static int rds_iw_map_reg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr, + struct scatterlist *sg, + unsigned int sg_len) { struct rds_iw_device *rds_iwdev = pool->device; struct rds_iw_mapping *mapping = &ibmr->mapping; u64 *dma_pages; - int i, ret = 0; + int ret = 0; rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); - dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); - if (IS_ERR(dma_pages)) { - ret = PTR_ERR(dma_pages); + ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); + if (ret) { dma_pages = NULL; goto out; } @@ -781,10 +748,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, goto out; } - for (i = 0; i < mapping->m_sg.dma_npages; ++i) - ibmr->page_list->page_list[i] = dma_pages[i]; - - ret = rds_iw_rdma_build_fastreg(mapping); + ret = rds_iw_rdma_reg_mr(mapping); if (ret) goto out; @@ -870,8 +834,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr) { - if (ibmr->page_list) - ib_free_fast_reg_page_list(ibmr->page_list); if (ibmr->mr) ib_dereg_mr(ibmr->mr); } diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c index f6e23c515b44..e20bd503f4bd 100644 --- a/net/rds/iw_send.c +++ b/net/rds/iw_send.c @@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic) printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); break; } - - send->s_page_list = ib_alloc_fast_reg_page_list( - ic->i_cm_id->device, fastreg_message_size); - if (IS_ERR(send->s_page_list)) { - printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n"); - break; - } } } @@ -177,8 +170,6 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic) for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { BUG_ON(!send->s_mr); ib_dereg_mr(send->s_mr); - BUG_ON(!send->s_page_list); - ib_free_fast_reg_page_list(send->s_page_list); if (send->s_send_wr.opcode == 0xdead) continue; if (send->s_rm) @@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) continue; } - if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { + if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) { ic->i_fastreg_posted = 1; continue; } @@ -252,7 +243,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) if (send->s_rm) rds_iw_send_unmap_rm(ic, send, wc.status); break; - case IB_WR_FAST_REG_MR: + case IB_WR_REG_MR: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: @@ -770,24 +761,26 @@ out: return ret; } -static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) +static int rds_iw_build_send_reg(struct rds_iw_send_work *send, + struct scatterlist *sg, + int sg_nents) { - BUG_ON(nent > send->s_page_list->max_page_list_len); - /* - * Perform a WR for the fast_reg_mr. Each individual page - * in the sg list is added to the fast reg page list and placed - * inside the fast_reg_mr WR. - */ - send->s_fast_reg_wr.wr.opcode = IB_WR_FAST_REG_MR; - send->s_fast_reg_wr.length = len; - send->s_fast_reg_wr.rkey = send->s_mr->rkey; - send->s_fast_reg_wr.page_list = send->s_page_list; - send->s_fast_reg_wr.page_list_len = nent; - send->s_fast_reg_wr.page_shift = PAGE_SHIFT; - send->s_fast_reg_wr.access_flags = IB_ACCESS_REMOTE_WRITE; - send->s_fast_reg_wr.iova_start = sg_addr; + int n; + + n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE); + if (unlikely(n != sg_nents)) + return n < 0 ? n : -EINVAL; + + send->s_reg_wr.wr.opcode = IB_WR_REG_MR; + send->s_reg_wr.wr.wr_id = 0; + send->s_reg_wr.wr.num_sge = 0; + send->s_reg_wr.mr = send->s_mr; + send->s_reg_wr.key = send->s_mr->rkey; + send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE; ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); + + return 0; } int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) @@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) int sent; int ret; int num_sge; + int sg_nents; rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); @@ -861,6 +855,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) scat = &op->op_sg[0]; sent = 0; num_sge = op->op_count; + sg_nents = 0; for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { send->s_rdma_wr.wr.send_flags = 0; @@ -904,7 +899,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) len = ib_sg_dma_len(ic->i_cm_id->device, scat); if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) - send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); + sg_nents++; else { send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); send->s_sge[j].length = len; @@ -951,8 +946,12 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) * fastreg_mr (or possibly a dma_mr) */ if (!op->op_write) { - rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], - op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); + ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos], + &op->op_sg[0], sg_nents); + if (ret) { + printk(KERN_WARNING "RDS/IW: failed to reg send mem\n"); + goto out; + } work_alloc++; } -- cgit v1.2.3 From 26630e8a09b72e8783e8963781d39e7edb2a60b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:38 +0300 Subject: IB/srp: Split srp_map_sg This is a preparation patch for the new registration API conversion. It splits srp_map_sg per registration strategy (srp_map_sg[fmr|fr|dma]. On its own it adds some code duplication, but it makes the API switch easier to comprehend. Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 157 ++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 235f9b8b27f1..ef1575d11a4b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1286,6 +1286,17 @@ static int srp_map_finish_fmr(struct srp_map_state *state, if (state->fmr.next >= state->fmr.end) return -ENOMEM; + WARN_ON_ONCE(!dev->use_fmr); + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && target->global_mr) { + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->global_mr->rkey); + goto reset_state; + } + fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, state->npages, io_addr); if (IS_ERR(fmr)) @@ -1297,6 +1308,10 @@ static int srp_map_finish_fmr(struct srp_map_state *state, srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, state->dma_len, fmr->fmr->rkey); +reset_state: + state->npages = 0; + state->dma_len = 0; + return 0; } @@ -1309,10 +1324,22 @@ static int srp_map_finish_fr(struct srp_map_state *state, struct ib_fast_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; + int err; if (state->fr.next >= state->fr.end) return -ENOMEM; + WARN_ON_ONCE(!dev->use_fast_reg); + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && target->global_mr) { + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->global_mr->rkey); + goto reset_state; + } + desc = srp_fr_pool_get(ch->fr_pool); if (!desc) return -ENOMEM; @@ -1342,7 +1369,15 @@ static int srp_map_finish_fr(struct srp_map_state *state, srp_map_desc(state, state->base_dma_addr, state->dma_len, desc->mr->rkey); - return ib_post_send(ch->qp, &wr.wr, &bad_wr); + err = ib_post_send(ch->qp, &wr.wr, &bad_wr); + if (err) + return err; + +reset_state: + state->npages = 0; + state->dma_len = 0; + + return 0; } static int srp_finish_mapping(struct srp_map_state *state, @@ -1350,26 +1385,9 @@ static int srp_finish_mapping(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - int ret = 0; - WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr); - - if (state->npages == 0) - return 0; - - if (state->npages == 1 && target->global_mr) - srp_map_desc(state, state->base_dma_addr, state->dma_len, - target->global_mr->rkey); - else - ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) : - srp_map_finish_fmr(state, ch); - - if (ret == 0) { - state->npages = 0; - state->dma_len = 0; - } - - return ret; + return dev->use_fast_reg ? srp_map_finish_fr(state, ch) : + srp_map_finish_fmr(state, ch); } static int srp_map_sg_entry(struct srp_map_state *state, @@ -1415,47 +1433,79 @@ static int srp_map_sg_entry(struct srp_map_state *state, return ret; } -static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct srp_request *req, struct scatterlist *scat, - int count) +static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) { - struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; struct scatterlist *sg; int i, ret; - state->desc = req->indirect_desc; - state->pages = req->map_page; - if (dev->use_fast_reg) { - state->fr.next = req->fr_list; - state->fr.end = req->fr_list + target->cmd_sg_cnt; - } else if (dev->use_fmr) { - state->fmr.next = req->fmr_list; - state->fmr.end = req->fmr_list + target->cmd_sg_cnt; + state->desc = req->indirect_desc; + state->pages = req->map_page; + state->fmr.next = req->fmr_list; + state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + + for_each_sg(scat, sg, count, i) { + ret = srp_map_sg_entry(state, ch, sg, i); + if (ret) + return ret; } - if (dev->use_fast_reg || dev->use_fmr) { - for_each_sg(scat, sg, count, i) { - ret = srp_map_sg_entry(state, ch, sg, i); - if (ret) - goto out; - } - ret = srp_finish_mapping(state, ch); + ret = srp_finish_mapping(state, ch); + if (ret) + return ret; + + req->nmdesc = state->nmdesc; + + return 0; +} + +static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + struct scatterlist *sg; + int i, ret; + + state->desc = req->indirect_desc; + state->pages = req->map_page; + state->fmr.next = req->fmr_list; + state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + + for_each_sg(scat, sg, count, i) { + ret = srp_map_sg_entry(state, ch, sg, i); if (ret) - goto out; - } else { - for_each_sg(scat, sg, count, i) { - srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), - ib_sg_dma_len(dev->dev, sg), - target->global_mr->rkey); - } + return ret; } + ret = srp_finish_mapping(state, ch); + if (ret) + return ret; + req->nmdesc = state->nmdesc; - ret = 0; -out: - return ret; + return 0; +} + +static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, + struct srp_request *req, struct scatterlist *scat, + int count) +{ + struct srp_target_port *target = ch->target; + struct srp_device *dev = target->srp_host->srp_dev; + struct scatterlist *sg; + int i; + + state->desc = req->indirect_desc; + for_each_sg(scat, sg, count, i) { + srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), + ib_sg_dma_len(dev->dev, sg), + target->global_mr->rkey); + } + + req->nmdesc = state->nmdesc; + + return 0; } /* @@ -1563,7 +1613,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); - srp_map_sg(&state, ch, req, scat, count); + if (dev->use_fast_reg) + srp_map_sg_fr(&state, ch, req, scat, count); + else if (dev->use_fmr) + srp_map_sg_fmr(&state, ch, req, scat, count); + else + srp_map_sg_dma(&state, ch, req, scat, count); /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this -- cgit v1.2.3 From f7f7aab1a5c0a495ae9a2d604badd1e3f9f20795 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:39 +0300 Subject: IB/srp: Convert to new registration API Instead of constructing a page list, call ib_map_mr_sg and post a new ib_reg_wr. srp_map_finish_fr now returns the number of sg elements registered. Remove srp_finish_mapping since no one is calling it. Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 125 ++++++++++++++++++------------------ drivers/infiniband/ulp/srp/ib_srp.h | 11 +++- 2 files changed, 71 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index ef1575d11a4b..f2c8dcaf96a7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -340,8 +340,6 @@ static void srp_destroy_fr_pool(struct srp_fr_pool *pool) return; for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { - if (d->frpl) - ib_free_fast_reg_page_list(d->frpl); if (d->mr) ib_dereg_mr(d->mr); } @@ -362,7 +360,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, struct srp_fr_pool *pool; struct srp_fr_desc *d; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; int i, ret = -EINVAL; if (pool_size <= 0) @@ -385,12 +382,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, goto destroy_pool; } d->mr = mr; - frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); - if (IS_ERR(frpl)) { - ret = PTR_ERR(frpl); - goto destroy_pool; - } - d->frpl = frpl; list_add_tail(&d->entry, &pool->free_list); } @@ -1321,23 +1312,24 @@ static int srp_map_finish_fr(struct srp_map_state *state, struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; struct ib_send_wr *bad_wr; - struct ib_fast_reg_wr wr; + struct ib_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; - int err; + int n, err; if (state->fr.next >= state->fr.end) return -ENOMEM; WARN_ON_ONCE(!dev->use_fast_reg); - if (state->npages == 0) + if (state->sg_nents == 0) return 0; - if (state->npages == 1 && target->global_mr) { - srp_map_desc(state, state->base_dma_addr, state->dma_len, + if (state->sg_nents == 1 && target->global_mr) { + srp_map_desc(state, sg_dma_address(state->sg), + sg_dma_len(state->sg), target->global_mr->rkey); - goto reset_state; + return 1; } desc = srp_fr_pool_get(ch->fr_pool); @@ -1347,37 +1339,33 @@ static int srp_map_finish_fr(struct srp_map_state *state, rkey = ib_inc_rkey(desc->mr->rkey); ib_update_fast_reg_key(desc->mr, rkey); - memcpy(desc->frpl->page_list, state->pages, - sizeof(state->pages[0]) * state->npages); + n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents, + dev->mr_page_size); + if (unlikely(n < 0)) + return n; - memset(&wr, 0, sizeof(wr)); - wr.wr.opcode = IB_WR_FAST_REG_MR; + wr.wr.next = NULL; + wr.wr.opcode = IB_WR_REG_MR; wr.wr.wr_id = FAST_REG_WR_ID_MASK; - wr.iova_start = state->base_dma_addr; - wr.page_list = desc->frpl; - wr.page_list_len = state->npages; - wr.page_shift = ilog2(dev->mr_page_size); - wr.length = state->dma_len; - wr.access_flags = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - wr.rkey = desc->mr->lkey; + wr.wr.num_sge = 0; + wr.wr.send_flags = 0; + wr.mr = desc->mr; + wr.key = desc->mr->rkey; + wr.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); *state->fr.next++ = desc; state->nmdesc++; - srp_map_desc(state, state->base_dma_addr, state->dma_len, - desc->mr->rkey); + srp_map_desc(state, desc->mr->iova, + desc->mr->length, desc->mr->rkey); err = ib_post_send(ch->qp, &wr.wr, &bad_wr); - if (err) + if (unlikely(err)) return err; -reset_state: - state->npages = 0; - state->dma_len = 0; - - return 0; + return n; } static int srp_finish_mapping(struct srp_map_state *state, @@ -1407,7 +1395,7 @@ static int srp_map_sg_entry(struct srp_map_state *state, while (dma_len) { unsigned offset = dma_addr & ~dev->mr_page_mask; if (state->npages == dev->max_pages_per_mr || offset != 0) { - ret = srp_finish_mapping(state, ch); + ret = srp_map_finish_fmr(state, ch); if (ret) return ret; } @@ -1429,7 +1417,7 @@ static int srp_map_sg_entry(struct srp_map_state *state, */ ret = 0; if (len != dev->mr_page_size) - ret = srp_finish_mapping(state, ch); + ret = srp_map_finish_fmr(state, ch); return ret; } @@ -1451,7 +1439,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, return ret; } - ret = srp_finish_mapping(state, ch); + ret = srp_map_finish_fmr(state, ch); if (ret) return ret; @@ -1464,23 +1452,23 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, struct srp_request *req, struct scatterlist *scat, int count) { - struct scatterlist *sg; - int i, ret; - state->desc = req->indirect_desc; - state->pages = req->map_page; - state->fmr.next = req->fmr_list; - state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt; + state->fr.next = req->fr_list; + state->fr.end = req->fr_list + ch->target->cmd_sg_cnt; + state->sg = scat; + state->sg_nents = scsi_sg_count(req->scmnd); - for_each_sg(scat, sg, count, i) { - ret = srp_map_sg_entry(state, ch, sg, i); - if (ret) - return ret; - } + while (state->sg_nents) { + int i, n; - ret = srp_finish_mapping(state, ch); - if (ret) - return ret; + n = srp_map_finish_fr(state, ch); + if (unlikely(n < 0)) + return n; + + state->sg_nents -= n; + for (i = 0; i < n; i++) + state->sg = sg_next(state->sg); + } req->nmdesc = state->nmdesc; @@ -1524,6 +1512,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, struct srp_map_state state; struct srp_direct_buf idb_desc; u64 idb_pages[1]; + struct scatterlist idb_sg[1]; int ret; memset(&state, 0, sizeof(state)); @@ -1531,20 +1520,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, state.gen.next = next_mr; state.gen.end = end_mr; state.desc = &idb_desc; - state.pages = idb_pages; - state.pages[0] = (req->indirect_dma_addr & - dev->mr_page_mask); - state.npages = 1; state.base_dma_addr = req->indirect_dma_addr; state.dma_len = idb_len; - ret = srp_finish_mapping(&state, ch); - if (ret < 0) - goto out; + + if (dev->use_fast_reg) { + state.sg = idb_sg; + state.sg_nents = 1; + sg_set_buf(idb_sg, req->indirect_desc, idb_len); + idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ + ret = srp_map_finish_fr(&state, ch); + if (ret < 0) + return ret; + } else if (dev->use_fmr) { + state.pages = idb_pages; + state.pages[0] = (req->indirect_dma_addr & + dev->mr_page_mask); + state.npages = 1; + ret = srp_map_finish_fmr(&state, ch); + if (ret < 0) + return ret; + } else { + return -EINVAL; + } *idb_rkey = idb_desc.key; -out: - return ret; + return 0; } static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 3608f2e4819c..87a2a919dc43 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -242,7 +242,6 @@ struct srp_iu { struct srp_fr_desc { struct list_head entry; struct ib_mr *mr; - struct ib_fast_reg_page_list *frpl; }; /** @@ -294,11 +293,17 @@ struct srp_map_state { } gen; }; struct srp_direct_buf *desc; - u64 *pages; + union { + u64 *pages; + struct scatterlist *sg; + }; dma_addr_t base_dma_addr; u32 dma_len; u32 total_len; - unsigned int npages; + union { + unsigned int npages; + int sg_nents; + }; unsigned int nmdesc; unsigned int ndesc; }; -- cgit v1.2.3 From 51c2b8e2ac872cd90a12c5e606eeb0daf1a91e25 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:40 +0300 Subject: IB/srp: Remove srp_finish_mapping No callers left, remove it. Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f2c8dcaf96a7..cf54e239f842 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1368,16 +1368,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, return n; } -static int srp_finish_mapping(struct srp_map_state *state, - struct srp_rdma_ch *ch) -{ - struct srp_target_port *target = ch->target; - struct srp_device *dev = target->srp_host->srp_dev; - - return dev->use_fast_reg ? srp_map_finish_fr(state, ch) : - srp_map_finish_fmr(state, ch); -} - static int srp_map_sg_entry(struct srp_map_state *state, struct srp_rdma_ch *ch, struct scatterlist *sg, int sg_index) -- cgit v1.2.3 From 9a21be531cacecce6c897faacd66ed4c7dbbe88b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:41 +0300 Subject: IB/srp: Dont allocate a page vector when using fast_reg The new fast registration API does not reuqire a page vector so we can't avoid allocating it. Signed-off-by: Sagi Grimberg Tested-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cf54e239f842..32f79624dd28 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -840,11 +840,12 @@ static void srp_free_req_data(struct srp_target_port *target, for (i = 0; i < target->req_ring_size; ++i) { req = &ch->req_ring[i]; - if (dev->use_fast_reg) + if (dev->use_fast_reg) { kfree(req->fr_list); - else + } else { kfree(req->fmr_list); - kfree(req->map_page); + kfree(req->map_page); + } if (req->indirect_dma_addr) { ib_dma_unmap_single(ibdev, req->indirect_dma_addr, target->indirect_size, @@ -878,14 +879,15 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch) GFP_KERNEL); if (!mr_list) goto out; - if (srp_dev->use_fast_reg) + if (srp_dev->use_fast_reg) { req->fr_list = mr_list; - else + } else { req->fmr_list = mr_list; - req->map_page = kmalloc(srp_dev->max_pages_per_mr * - sizeof(void *), GFP_KERNEL); - if (!req->map_page) - goto out; + req->map_page = kmalloc(srp_dev->max_pages_per_mr * + sizeof(void *), GFP_KERNEL); + if (!req->map_page) + goto out; + } req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); if (!req->indirect_desc) goto out; -- cgit v1.2.3 From dd01e66a6c532a8cd183cbc02ebaef99f186345f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:42 +0300 Subject: IB/mlx5: Remove old FRWR API support No ULP uses it anymore, go ahead and remove it. Keep only the local invalidate part of the handlers. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 -- drivers/infiniband/hw/mlx5/main.c | 2 - drivers/infiniband/hw/mlx5/mlx5_ib.h | 14 ------ drivers/infiniband/hw/mlx5/mr.c | 42 ---------------- drivers/infiniband/hw/mlx5/qp.c | 97 ++++-------------------------------- 5 files changed, 9 insertions(+), 149 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 206930096d56..3dfd287256d6 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -112,9 +112,6 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) case IB_WR_REG_MR: return IB_WC_REG_MR; - case IB_WR_FAST_REG_MR: - return IB_WC_FAST_REG_MR; - default: pr_warn("unknown completion status\n"); return 0; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e93044ea6ce..bdd60a69be2d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1426,8 +1426,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.process_mad = mlx5_ib_process_mad; dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; - dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; - dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a29b28c31c44..633347260b79 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -338,12 +338,6 @@ struct mlx5_ib_mr { void *descs_alloc; }; -struct mlx5_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; -}; - struct mlx5_ib_umr_context { enum ib_wc_status status; struct completion done; @@ -494,11 +488,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx5_ib_mr, ibmr); } -static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); -} - struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; @@ -569,9 +558,6 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6177e8053888..ec8993a7b3be 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1383,48 +1383,6 @@ err_free: return ERR_PTR(err); } -struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof(u64); - - mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - struct mlx5_ib_dev *dev = to_mdev(page_list->device); - int size = page_list->max_page_list_len * sizeof(u64); - - dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index da2b46c2624a..307bdbca8938 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -64,7 +64,6 @@ static const u32 mlx5_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, - [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_REG_MR] = MLX5_OPCODE_UMR, [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, @@ -1908,20 +1907,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = frwr_mkey_mask(); } -static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr, int li) +static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) { memset(umr, 0, sizeof(*umr)); - - if (li) { - umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; - return; - } - - umr->flags = (1 << 5); /* fail if not free */ - umr->klm_octowords = get_klm_octo(fast_reg_wr(wr)->page_list_len); - umr->mkey_mask = frwr_mkey_mask(); + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; } static __be64 get_umr_reg_mr_mask(void) @@ -2015,24 +2005,10 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, seg->log2_page_size = ilog2(mr->ibmr.page_size); } -static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, - int li, int *writ) +static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) { memset(seg, 0, sizeof(*seg)); - if (li) { - seg->status = MLX5_MKEY_STATUS_FREE; - return; - } - - seg->flags = get_umr_flags(fast_reg_wr(wr)->access_flags) | - MLX5_ACCESS_MODE_MTT; - *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); - seg->qpn_mkey7_0 = cpu_to_be32((fast_reg_wr(wr)->rkey & 0xff) | 0xffffff00); - seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - seg->start_addr = cpu_to_be64(fast_reg_wr(wr)->iova_start); - seg->len = cpu_to_be64(fast_reg_wr(wr)->length); - seg->xlt_oct_size = cpu_to_be32((fast_reg_wr(wr)->page_list_len + 1) / 2); - seg->log2_page_size = fast_reg_wr(wr)->page_shift; + seg->status = MLX5_MKEY_STATUS_FREE; } static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) @@ -2067,24 +2043,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } -static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, - struct ib_send_wr *wr, - struct mlx5_core_dev *mdev, - struct mlx5_ib_pd *pd, - int writ) -{ - struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(fast_reg_wr(wr)->page_list); - u64 *page_list = fast_reg_wr(wr)->page_list->page_list; - u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); - int i; - - for (i = 0; i < fast_reg_wr(wr)->page_list_len; i++) - mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); - dseg->addr = cpu_to_be64(mfrpl->map); - dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * fast_reg_wr(wr)->page_list_len, 64)); - dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); -} - static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { @@ -2504,36 +2462,18 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return 0; } -static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, - struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) { - int writ = 0; - int li; - - li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; - if (unlikely(wr->send_flags & IB_SEND_INLINE)) - return -EINVAL; - - set_frwr_umr_segment(*seg, wr, li); + set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_mkey_segment(*seg, wr, li, &writ); + set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - if (!li) { - if (unlikely(fast_reg_wr(wr)->page_list_len > - fast_reg_wr(wr)->page_list->max_page_list_len)) - return -ENOMEM; - - set_frwr_pages(*seg, wr, mdev, pd, writ); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - } - return 0; } static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) @@ -2649,7 +2589,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, { struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; @@ -2724,25 +2663,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - num_sge = 0; - break; - - case IB_WR_FAST_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; - ctrl->imm = cpu_to_be32(fast_reg_wr(wr)->rkey); - err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } + set_linv_wr(qp, &seg, &size); num_sge = 0; break; -- cgit v1.2.3 From e761c67fbf20d60175750ce2994822f30f2edf6a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:43 +0300 Subject: IB/mlx4: Remove old FRWR API support No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 3 +-- drivers/infiniband/hw/mlx4/main.c | 2 -- drivers/infiniband/hw/mlx4/mlx4_ib.h | 15 ----------- drivers/infiniband/hw/mlx4/mr.c | 48 ------------------------------------ drivers/infiniband/hw/mlx4/qp.c | 31 ----------------------- 5 files changed, 1 insertion(+), 98 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 2ea4125b7903..b88fc8f5ab18 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -818,8 +818,7 @@ repoll: wc->opcode = IB_WC_LSO; break; case MLX4_OPCODE_FMR: - wc->opcode = IB_WC_FAST_REG_MR; - /* TODO: wc->opcode = IB_WC_REG_MR; */ + wc->opcode = IB_WC_REG_MR; break; case MLX4_OPCODE_LOCAL_INVAL: wc->opcode = IB_WC_LOCAL_INV; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7e87599fbdcb..f567160a4a56 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2267,8 +2267,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; - ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; - ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 64c80e15a9f8..1caa11edac03 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -147,12 +147,6 @@ struct mlx4_ib_mw { struct mlx4_mw mmw; }; -struct mlx4_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - __be64 *mapped_page_list; - dma_addr_t map; -}; - struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; @@ -653,11 +647,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) return container_of(ibmw, struct mlx4_ib_mw, ibmw); } -static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) -{ - return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); -} - static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); @@ -724,10 +713,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len); -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); - int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e5a32e4520e8..4d1e1c632603 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -443,54 +443,6 @@ err_free: return ERR_PTR(err); } -struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, - int page_list_len) -{ - struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct mlx4_ib_fast_reg_page_list *mfrpl; - int size = page_list_len * sizeof (u64); - - if (page_list_len > MLX4_MAX_FAST_REG_PAGES) - return ERR_PTR(-EINVAL); - - mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); - if (!mfrpl) - return ERR_PTR(-ENOMEM); - - mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); - if (!mfrpl->ibfrpl.page_list) - goto err_free; - - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> - pdev->dev, - size, &mfrpl->map, - GFP_KERNEL); - if (!mfrpl->mapped_page_list) - goto err_free; - - WARN_ON(mfrpl->map & 0x3f); - - return &mfrpl->ibfrpl; - -err_free: - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); - return ERR_PTR(-ENOMEM); -} - -void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - struct mlx4_ib_dev *dev = to_mdev(page_list->device); - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); - int size = page_list->max_page_list_len * sizeof (u64); - - dma_free_coherent(&dev->dev->persist->pdev->dev, size, - mfrpl->mapped_page_list, - mfrpl->map); - kfree(mfrpl->ibfrpl.page_list); - kfree(mfrpl); -} - struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0067f4b4dc09..a2e4ca56da44 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -111,7 +111,6 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), - [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), @@ -2522,28 +2521,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, fseg->reserved[1] = 0; } -static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, - struct ib_fast_reg_wr *wr) -{ - struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->page_list); - int i; - - for (i = 0; i < wr->page_list_len; ++i) - mfrpl->mapped_page_list[i] = - cpu_to_be64(wr->page_list->page_list[i] | - MLX4_MTT_FLAG_PRESENT); - - fseg->flags = convert_access(wr->access_flags); - fseg->mem_key = cpu_to_be32(wr->rkey); - fseg->buf_list = cpu_to_be64(mfrpl->map); - fseg->start_addr = cpu_to_be64(wr->iova_start); - fseg->reg_len = cpu_to_be64(wr->length); - fseg->offset = 0; /* XXX -- is this just for ZBVA? */ - fseg->page_size = cpu_to_be32(wr->page_shift); - fseg->reserved[0] = 0; - fseg->reserved[1] = 0; -} - static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_bind_mw_wr *wr) { @@ -2875,14 +2852,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; break; - case IB_WR_FAST_REG_MR: - ctrl->srcrb_flags |= - cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); - set_fmr_seg(wqe, fast_reg_wr(wr)); - wqe += sizeof (struct mlx4_wqe_fmr_seg); - size += sizeof (struct mlx4_wqe_fmr_seg) / 16; - break; - case IB_WR_REG_MR: ctrl->srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); -- cgit v1.2.3 From 191cfed565c0435aeced5f11f6cf9b03ec380d5b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:44 +0300 Subject: RDMA/ocrdma: Remove old FRWR API No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 - drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 104 +--------------------------- drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 4 -- 3 files changed, 1 insertion(+), 109 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 19a5ccd6e5fc..62b7009daa6c 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -181,8 +181,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.alloc_mr = ocrdma_alloc_mr; dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; - dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; - dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; /* mandatory to support user space verbs consumer. */ dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 49481dfaa493..583001bcfb8f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2133,41 +2133,6 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, ext_rw->len = hdr->total_len; } -static void build_frmr_pbes(struct ib_fast_reg_wr *wr, - struct ocrdma_pbl *pbl_tbl, - struct ocrdma_hw_mr *hwmr) -{ - int i; - u64 buf_addr = 0; - int num_pbes; - struct ocrdma_pbe *pbe; - - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - num_pbes = 0; - - /* go through the OS phy regions & fill hw pbe entries into pbls. */ - for (i = 0; i < wr->page_list_len; i++) { - /* number of pbes can be more for one OS buf, when - * buffers are of different sizes. - * split the ib_buf to one or more pbes. - */ - buf_addr = wr->page_list->page_list[i]; - pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); - pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); - num_pbes += 1; - pbe++; - - /* if the pbl is full storing the pbes, - * move to next pbl. - */ - if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - } - } - return; -} - static int get_encoded_page_size(int pg_sz) { /* Max size is 256M 4096 << 16 */ @@ -2234,50 +2199,6 @@ static int ocrdma_build_reg(struct ocrdma_qp *qp, return 0; } -static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, - struct ib_send_wr *send_wr) -{ - u64 fbo; - struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); - struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); - struct ocrdma_mr *mr; - struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); - u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); - - wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); - - if (wr->page_list_len > dev->attr.max_pages_per_frmr) - return -EINVAL; - - hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); - hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); - - if (wr->page_list_len == 0) - BUG(); - if (wr->access_flags & IB_ACCESS_LOCAL_WRITE) - hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; - if (wr->access_flags & IB_ACCESS_REMOTE_WRITE) - hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; - if (wr->access_flags & IB_ACCESS_REMOTE_READ) - hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; - hdr->lkey = wr->rkey; - hdr->total_len = wr->length; - - fbo = wr->iova_start - (wr->page_list->page_list[0] & PAGE_MASK); - - fast_reg->va_hi = upper_32_bits(wr->iova_start); - fast_reg->va_lo = (u32) (wr->iova_start & 0xffffffff); - fast_reg->fbo_hi = upper_32_bits(fbo); - fast_reg->fbo_lo = (u32) fbo & 0xffffffff; - fast_reg->num_sges = wr->page_list_len; - fast_reg->size_sge = - get_encoded_page_size(1 << wr->page_shift); - mr = (struct ocrdma_mr *) (unsigned long) - dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; - build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); - return 0; -} - static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); @@ -2357,9 +2278,6 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; hdr->lkey = wr->ex.invalidate_rkey; break; - case IB_WR_FAST_REG_MR: - status = ocrdma_build_fr(qp, hdr, wr); - break; case IB_WR_REG_MR: status = ocrdma_build_reg(qp, hdr, reg_wr(wr)); break; @@ -2627,7 +2545,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, ibwc->opcode = IB_WC_SEND; break; case OCRDMA_FR_MR: - ibwc->opcode = IB_WC_FAST_REG_MR; + ibwc->opcode = IB_WC_REG_MR; break; case OCRDMA_LKEY_INV: ibwc->opcode = IB_WC_LOCAL_INV; @@ -3148,26 +3066,6 @@ pl_err: return ERR_PTR(-ENOMEM); } -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len) -{ - struct ib_fast_reg_page_list *frmr_list; - int size; - - size = sizeof(*frmr_list) + (page_list_len * sizeof(u64)); - frmr_list = kzalloc(size, GFP_KERNEL); - if (!frmr_list) - return ERR_PTR(-ENOMEM); - frmr_list->page_list = (u64 *)(frmr_list + 1); - return frmr_list; -} - -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) -{ - kfree(page_list); -} - #define MAX_KERNEL_PBE_SIZE 65536 static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, int buf_cnt, u32 *pbe_size) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 1b2d3ac10203..a2f3b4dc20b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -128,9 +128,5 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); -struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device - *ibdev, - int page_list_len); -void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); #endif /* __OCRDMA_VERBS_H__ */ -- cgit v1.2.3 From 94e585cb7467a3e4cecb7267cd8303d2b693a8b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:45 +0300 Subject: RDMA/cxgb3: Remove old FRWR API No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb3/iwch_cq.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 24 --------------- drivers/infiniband/hw/cxgb3/iwch_qp.c | 47 ----------------------------- 3 files changed, 1 insertion(+), 72 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index cf5474ae68ff..cfe404925a39 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -123,7 +123,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->opcode = IB_WC_LOCAL_INV; break; case T3_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index f7b915378343..c34725ca0bb4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -884,28 +884,6 @@ static int iwch_map_mr_sg(struct ib_mr *ibmr, return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page); } -static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len) -{ - struct ib_fast_reg_page_list *page_list; - - page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), - GFP_KERNEL); - if (!page_list) - return ERR_PTR(-ENOMEM); - - page_list->page_list = (u64 *)(page_list + 1); - page_list->max_page_list_len = page_list_len; - - return page_list; -} - -static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) -{ - kfree(page_list); -} - static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; @@ -1483,8 +1461,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.alloc_mr = iwch_alloc_mr; dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index a09ea538e990..d0548fc6395e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -189,48 +189,6 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, return 0; } -static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *send_wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); - int i; - __be64 *p; - - if (wr->page_list_len > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->rkey); - wqe->fastreg.len = cpu_to_be32(wr->length); - wqe->fastreg.va_base_hi = cpu_to_be32(wr->iova_start >> 32); - wqe->fastreg.va_base_lo_fbo = cpu_to_be32(wr->iova_start & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(wr->page_list_len) | - V_FR_PAGE_SIZE(wr->page_shift-12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access_flags))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < wr->page_list_len; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + wr->page_list_len - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)wr->page_list->page_list[i]); - } - *flit_cnt = 5 + wr->page_list_len; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { @@ -457,11 +415,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; - case IB_WR_FAST_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; case IB_WR_REG_MR: t3_wr_opcode = T3_WR_FASTREG; err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, -- cgit v1.2.3 From d3cfd002e6586d715515c2f293cf04cf72b33002 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:46 +0300 Subject: iw_cxgb4: Remove old FRWR API No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 -------- drivers/infiniband/hw/cxgb4/mem.c | 45 -------------------- drivers/infiniband/hw/cxgb4/provider.c | 2 - drivers/infiniband/hw/cxgb4/qp.c | 77 ---------------------------------- 5 files changed, 1 insertion(+), 143 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 92d518382a9f..de9cd6901752 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -752,7 +752,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->opcode = IB_WC_LOCAL_INV; break; case FW_RI_FAST_REGISTER: - wc->opcode = IB_WC_FAST_REG_MR; + wc->opcode = IB_WC_REG_MR; break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 6a8e2696455f..00e55faa086a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -409,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) return container_of(ibmw, struct c4iw_mw, ibmw); } -struct c4iw_fr_page_list { - struct ib_fast_reg_page_list ibpl; - DEFINE_DMA_UNMAP_ADDR(mapping); - dma_addr_t dma_addr; - struct c4iw_dev *dev; - int pll_len; -}; - -static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( - struct ib_fast_reg_page_list *ibpl) -{ - return container_of(ibpl, struct c4iw_fr_page_list, ibpl); -} - struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; @@ -970,10 +956,6 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); void c4iw_qp_rem_ref(struct ib_qp *qp); -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list); -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( - struct ib_device *device, - int page_list_len); struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 1e46a260a0fa..e1629ab58db7 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -945,51 +945,6 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page); } -struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, - int page_list_len) -{ - struct c4iw_fr_page_list *c4pl; - struct c4iw_dev *dev = to_c4iw_dev(device); - dma_addr_t dma_addr; - int pll_len = roundup(page_list_len * sizeof(u64), 32); - - c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); - if (!c4pl) - return ERR_PTR(-ENOMEM); - - c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, - pll_len, &dma_addr, - GFP_KERNEL); - if (!c4pl->ibpl.page_list) { - kfree(c4pl); - return ERR_PTR(-ENOMEM); - } - dma_unmap_addr_set(c4pl, mapping, dma_addr); - c4pl->dma_addr = dma_addr; - c4pl->dev = dev; - c4pl->pll_len = pll_len; - - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); - - return &c4pl->ibpl; -} - -void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) -{ - struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); - - PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", - __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, - &c4pl->dma_addr); - - dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->pll_len, - c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); - kfree(c4pl); -} - int c4iw_dereg_mr(struct ib_mr *ib_mr) { struct c4iw_dev *rhp; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e292308e82bd..0a7d99818b17 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -558,8 +558,6 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; - dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; dev->ibdev.attach_mcast = c4iw_multicast_attach; dev->ibdev.detach_mcast = c4iw_multicast_detach; dev->ibdev.process_mad = c4iw_process_mad; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aac75a068768..aa515afee724 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -670,75 +670,6 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *send_wr, u8 *len16, u8 t5dev) -{ - struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); - - struct fw_ri_immd *imdp; - __be64 *p; - int i; - int pbllen = roundup(wr->page_list_len * sizeof(u64), 32); - int rem; - - if (wr->page_list_len > t4_max_fr_depth(use_dsgl)) - return -EINVAL; - - wqe->fr.qpbinde_to_dcacpu = 0; - wqe->fr.pgsz_shift = wr->page_shift - 12; - wqe->fr.addr_type = FW_RI_VA_BASED_TO; - wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access_flags); - wqe->fr.len_hi = 0; - wqe->fr.len_lo = cpu_to_be32(wr->length); - wqe->fr.stag = cpu_to_be32(wr->rkey); - wqe->fr.va_hi = cpu_to_be32(wr->iova_start >> 32); - wqe->fr.va_lo_fbo = cpu_to_be32(wr->iova_start & 0xffffffff); - - if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->page_list); - struct fw_ri_dsgl *sglp; - - for (i = 0; i < wr->page_list_len; i++) { - wr->page_list->page_list[i] = (__force u64) - cpu_to_be64((u64)wr->page_list->page_list[i]); - } - - sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); - sglp->op = FW_RI_DATA_DSGL; - sglp->r1 = 0; - sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); - sglp->len0 = cpu_to_be32(pbllen); - - *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); - } else { - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - rem = pbllen; - for (i = 0; i < wr->page_list_len; i++) { - *p = cpu_to_be64((u64)wr->page_list->page_list[i]); - rem -= sizeof(*p); - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - BUG_ON(rem < 0); - while (rem) { - *p = 0; - rem -= sizeof(*p); - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) - + pbllen, 16); - } - return 0; -} - static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { @@ -873,14 +804,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_FAST_REG_MR: - fw_opcode = FW_RI_FR_NSMR_WR; - swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, - !is_t4( - qhp->rhp->rdev.lldi.adapter_type) ? - 1 : 0); - break; case IB_WR_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; -- cgit v1.2.3 From b8533eccc8eadabc559ed25e4b848c71a2433c18 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:47 +0300 Subject: IB/qib: Remove old FRWR API No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_keys.c | 56 ----------------------------------- drivers/infiniband/hw/qib/qib_mr.c | 32 +------------------- drivers/infiniband/hw/qib/qib_verbs.c | 8 ----- drivers/infiniband/hw/qib/qib_verbs.h | 7 ----- 4 files changed, 1 insertion(+), 102 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c index 95b8b9110fc6..d725c565518d 100644 --- a/drivers/infiniband/hw/qib/qib_keys.c +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -335,62 +335,6 @@ bail: return 0; } -/* - * Initialize the memory region specified by the work reqeust. - */ -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *send_wr) -{ - struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr); - struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct qib_pd *pd = to_ipd(qp->ibqp.pd); - struct qib_mregion *mr; - u32 rkey = wr->rkey; - unsigned i, n, m; - int ret = -EINVAL; - unsigned long flags; - u64 *page_list; - size_t ps; - - spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || rkey == 0) - goto bail; - - mr = rcu_dereference_protected( - rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], - lockdep_is_held(&rkt->lock)); - if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) - goto bail; - - if (wr->page_list_len > mr->max_segs) - goto bail; - - ps = 1UL << wr->page_shift; - if (wr->length > ps * wr->page_list_len) - goto bail; - - mr->user_base = wr->iova_start; - mr->iova = wr->iova_start; - mr->lkey = rkey; - mr->length = wr->length; - mr->access_flags = wr->access_flags; - page_list = wr->page_list->page_list; - m = 0; - n = 0; - for (i = 0; i < wr->page_list_len; i++) { - mr->map[m]->segs[n].vaddr = (void *) page_list[i]; - mr->map[m]->segs[n].length = ps; - if (++n == QIB_SEGSZ) { - m++; - n = 0; - } - } - - ret = 0; -bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; -} - /* * Initialize the memory region specified by the work request. */ diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index dcabf1b2e263..294f5c706be9 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -324,7 +324,7 @@ out: /* * Allocate a memory region usable with the - * IB_WR_FAST_REG_MR send work request. + * IB_WR_REG_MR send work request. * * Return the memory region on success, otherwise return an errno. */ @@ -375,36 +375,6 @@ int qib_map_mr_sg(struct ib_mr *ibmr, return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page); } -struct ib_fast_reg_page_list * -qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) -{ - unsigned size = page_list_len * sizeof(u64); - struct ib_fast_reg_page_list *pl; - - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - - pl = kzalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); - - pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; - - return pl; - -err_free: - kfree(pl); - return ERR_PTR(-ENOMEM); -} - -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) -{ - kfree(pl->page_list); - kfree(pl); -} - /** * qib_alloc_fmr - allocate a fast memory region * @pd: the protection domain for this memory region diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a1e53d7b662b..de6cb6fcda8d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -365,9 +365,6 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, if (wr->opcode == IB_WR_REG_MR) { if (qib_reg_mr(qp, reg_wr(wr))) goto bail_inval; - } else if (wr->opcode == IB_WR_FAST_REG_MR) { - if (qib_fast_reg_mr(qp, wr)) - goto bail_inval; } else if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) goto bail_inval; @@ -407,9 +404,6 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, else if (wr->opcode == IB_WR_REG_MR) memcpy(&wqe->reg_wr, reg_wr(wr), sizeof(wqe->reg_wr)); - else if (wr->opcode == IB_WR_FAST_REG_MR) - memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), - sizeof(wqe->fast_reg_wr)); else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE || wr->opcode == IB_WR_RDMA_READ) @@ -2267,8 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->dereg_mr = qib_dereg_mr; ibdev->alloc_mr = qib_alloc_mr; ibdev->map_mr_sg = qib_map_mr_sg; - ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; - ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; ibdev->alloc_fmr = qib_alloc_fmr; ibdev->map_phys_fmr = qib_map_phys_fmr; ibdev->unmap_fmr = qib_unmap_fmr; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 648f6c64f00d..2baf5ad251ed 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -344,7 +344,6 @@ struct qib_swqe { struct ib_send_wr wr; /* don't use wr.sg_list */ struct ib_ud_wr ud_wr; struct ib_reg_wr reg_wr; - struct ib_fast_reg_wr fast_reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; }; @@ -1051,12 +1050,6 @@ int qib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents); -struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( - struct ib_device *ibdev, int page_list_len); - -void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); - -int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr); struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, -- cgit v1.2.3 From 13d3e895faf68ab5f1981298198897a20b35827a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:48 +0300 Subject: RDMA/nes: Remove old FRWR API No ULP uses it anymore, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_hw.h | 6 -- drivers/infiniband/hw/nes/nes_verbs.c | 162 +--------------------------------- 2 files changed, 1 insertion(+), 167 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index d748e4b31b8d..c9080208aad2 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl { dma_addr_t paddr; }; -struct nes_ib_fast_reg_page_list { - struct ib_fast_reg_page_list ibfrpl; - struct nes_fast_mr_wqe_pbl nes_wqe_pbl; - u64 pbl; -}; - struct nes_listener { struct work_struct work; struct workqueue_struct *wq; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index dc8a03b0ca0d..137880a19ebe 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -486,76 +486,6 @@ static int nes_map_mr_sg(struct ib_mr *ibmr, return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page); } -/* - * nes_alloc_fast_reg_page_list - */ -static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( - struct ib_device *ibdev, - int page_list_len) -{ - struct nes_vnic *nesvnic = to_nesvnic(ibdev); - struct nes_device *nesdev = nesvnic->nesdev; - struct ib_fast_reg_page_list *pifrpl; - struct nes_ib_fast_reg_page_list *pnesfrpl; - - if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) - return ERR_PTR(-E2BIG); - /* - * Allocate the ib_fast_reg_page_list structure, the - * nes_fast_bpl structure, and the PLB table. - */ - pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) + - page_list_len * sizeof(u64), GFP_KERNEL); - - if (!pnesfrpl) - return ERR_PTR(-ENOMEM); - - pifrpl = &pnesfrpl->ibfrpl; - pifrpl->page_list = &pnesfrpl->pbl; - pifrpl->max_page_list_len = page_list_len; - /* - * Allocate the WQE PBL - */ - pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev, - page_list_len * sizeof(u64), - &pnesfrpl->nes_wqe_pbl.paddr); - - if (!pnesfrpl->nes_wqe_pbl.kva) { - kfree(pnesfrpl); - return ERR_PTR(-ENOMEM); - } - nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " - "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " - "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, - pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); - - return pifrpl; -} - -/* - * nes_free_fast_reg_page_list - */ -static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) -{ - struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_ib_fast_reg_page_list *pnesfrpl; - - pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); - /* - * Free the WQE PBL. - */ - pci_free_consistent(nesdev->pcidev, - pifrpl->max_page_list_len * sizeof(u64), - pnesfrpl->nes_wqe_pbl.kva, - pnesfrpl->nes_wqe_pbl.paddr); - /* - * Free the PBL structure - */ - kfree(pnesfrpl); -} - /** * nes_query_device */ @@ -3470,94 +3400,6 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, ib_wr->ex.invalidate_rkey); break; - case IB_WR_FAST_REG_MR: - { - int i; - struct ib_fast_reg_wr *fwr = fast_reg_wr(ib_wr); - int flags = fwr->access_flags; - struct nes_ib_fast_reg_page_list *pnesfrpl = - container_of(fwr->page_list, - struct nes_ib_fast_reg_page_list, - ibfrpl); - u64 *src_page_list = pnesfrpl->ibfrpl.page_list; - u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; - - if (fwr->page_list_len > - (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { - nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); - err = -EINVAL; - break; - } - wqe_misc = NES_IWARP_SQ_OP_FAST_REG; - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, - fwr->iova_start); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, - fwr->length); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, - fwr->rkey); - /* Set page size: */ - if (fwr->page_shift == 12) { - wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; - } else if (fwr->page_shift == 21) { - wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; - } else { - nes_debug(NES_DBG_IW_TX, "Invalid page shift," - " ib_wr=%u, max=1\n", ib_wr->num_sge); - err = -EINVAL; - break; - } - /* Set access_flags */ - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; - if (flags & IB_ACCESS_LOCAL_WRITE) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE; - - if (flags & IB_ACCESS_REMOTE_WRITE) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE; - - if (flags & IB_ACCESS_REMOTE_READ) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ; - - if (flags & IB_ACCESS_MW_BIND) - wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; - - /* Fill in PBL info: */ - if (fwr->page_list_len > - pnesfrpl->ibfrpl.max_page_list_len) { - nes_debug(NES_DBG_IW_TX, "Invalid page list length," - " ib_wr=%p, value=%u, max=%u\n", - ib_wr, fwr->page_list_len, - pnesfrpl->ibfrpl.max_page_list_len); - err = -EINVAL; - break; - } - - set_wqe_64bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, - pnesfrpl->nes_wqe_pbl.paddr); - - set_wqe_32bit_value(wqe->wqe_words, - NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, - fwr->page_list_len * 8); - - for (i = 0; i < fwr->page_list_len; i++) - dst_page_list[i] = cpu_to_le64(src_page_list[i]); - - nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " - "length: %d, rkey: %0x, pgl_paddr: %llx, " - "page_list_len: %u, wqe_misc: %x\n", - (unsigned long long) fwr->iova_start, - fwr->length, - fwr->rkey, - (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, - fwr->page_list_len, - wqe_misc); - break; - } case IB_WR_REG_MR: { struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr); @@ -3866,7 +3708,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: - entry->opcode = IB_WC_FAST_REG_MR; + entry->opcode = IB_WC_REG_MR; break; } @@ -4055,8 +3897,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.alloc_mr = nes_alloc_mr; nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; - nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; nesibdev->ibdev.attach_mcast = nes_multicast_attach; nesibdev->ibdev.detach_mcast = nes_multicast_detach; -- cgit v1.2.3 From 349abd059ed195c75b5f787102a333a4dae10775 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 29 Oct 2015 17:33:31 +0200 Subject: IB/hfi1: Remove fast registration from the code The driver does not support it anyway, and the support should be added to a generic layer shared by both hfi1, qib and softroce drivers. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/staging/rdma/hfi1/keys.c | 55 --------------------------------------- drivers/staging/rdma/hfi1/mr.c | 33 ++--------------------- drivers/staging/rdma/hfi1/verbs.c | 9 +------ drivers/staging/rdma/hfi1/verbs.h | 8 ------ 4 files changed, 3 insertions(+), 102 deletions(-) diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c index 82c21b1c0263..cb4e6087dfdb 100644 --- a/drivers/staging/rdma/hfi1/keys.c +++ b/drivers/staging/rdma/hfi1/keys.c @@ -354,58 +354,3 @@ bail: rcu_read_unlock(); return 0; } - -/* - * Initialize the memory region specified by the work request. - */ -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr) -{ - struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; - struct hfi1_pd *pd = to_ipd(qp->ibqp.pd); - struct hfi1_mregion *mr; - u32 rkey = wr->rkey; - unsigned i, n, m; - int ret = -EINVAL; - unsigned long flags; - u64 *page_list; - size_t ps; - - spin_lock_irqsave(&rkt->lock, flags); - if (pd->user || rkey == 0) - goto bail; - - mr = rcu_dereference_protected( - rkt->table[(rkey >> (32 - hfi1_lkey_table_size))], - lockdep_is_held(&rkt->lock)); - if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) - goto bail; - - if (wr->page_list_len > mr->max_segs) - goto bail; - - ps = 1UL << wr->page_shift; - if (wr->length > ps * wr->page_list_len) - goto bail; - - mr->user_base = wr->iova_start; - mr->iova = wr->iova_start; - mr->lkey = rkey; - mr->length = wr->length; - mr->access_flags = wr->access_flags; - page_list = wr->page_list->page_list; - m = 0; - n = 0; - for (i = 0; i < wr->page_list_len; i++) { - mr->map[m]->segs[n].vaddr = (void *) page_list[i]; - mr->map[m]->segs[n].length = ps; - if (++n == HFI1_SEGSZ) { - m++; - n = 0; - } - } - - ret = 0; -bail: - spin_unlock_irqrestore(&rkt->lock, flags); - return ret; -} diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c index bd64e4f986f9..402bd6414176 100644 --- a/drivers/staging/rdma/hfi1/mr.c +++ b/drivers/staging/rdma/hfi1/mr.c @@ -344,9 +344,10 @@ out: /* * Allocate a memory region usable with the - * IB_WR_FAST_REG_MR send work request. + * IB_WR_REG_MR send work request. * * Return the memory region on success, otherwise return an errno. + * FIXME: IB_WR_REG_MR is not supported */ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -364,36 +365,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, return &mr->ibmr; } -struct ib_fast_reg_page_list * -hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) -{ - unsigned size = page_list_len * sizeof(u64); - struct ib_fast_reg_page_list *pl; - - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - - pl = kzalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); - - pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; - - return pl; - -err_free: - kfree(pl); - return ERR_PTR(-ENOMEM); -} - -void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) -{ - kfree(pl->page_list); - kfree(pl); -} - /** * hfi1_alloc_fmr - allocate a fast memory region * @pd: the protection domain for this memory region diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 981e6c1b79a3..6e2da7ee6d2f 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -380,9 +380,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) * undefined operations. * Make sure buffer is large enough to hold the result for atomics. */ - if (wr->opcode == IB_WR_FAST_REG_MR) { - return -EINVAL; - } else if (qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_UC) { if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) return -EINVAL; } else if (qp->ibqp.qp_type != IB_QPT_RC) { @@ -417,9 +415,6 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) if (qp->ibqp.qp_type != IB_QPT_UC && qp->ibqp.qp_type != IB_QPT_RC) memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_FAST_REG_MR) - memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), - sizeof(wqe->fast_reg_wr)); else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE || wr->opcode == IB_WR_RDMA_READ) @@ -2065,8 +2060,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->reg_user_mr = hfi1_reg_user_mr; ibdev->dereg_mr = hfi1_dereg_mr; ibdev->alloc_mr = hfi1_alloc_mr; - ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list; - ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list; ibdev->alloc_fmr = hfi1_alloc_fmr; ibdev->map_phys_fmr = hfi1_map_phys_fmr; ibdev->unmap_fmr = hfi1_unmap_fmr; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index cf5a3c956284..159ec08bfcd8 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -353,7 +353,6 @@ struct hfi1_swqe { struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; struct ib_ud_wr ud_wr; - struct ib_fast_reg_wr fast_reg_wr; }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ @@ -1026,13 +1025,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_entries); -struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list( - struct ib_device *ibdev, int page_list_len); - -void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); - -int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_fast_reg_wr *wr); - struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); -- cgit v1.2.3 From 5eae15927b3bccca6497506c8da38895da029b98 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 29 Oct 2015 17:33:32 +0200 Subject: IB/ipath: Remove fast registration from the code The driver does not support it anyway, and the support should be added to a generic layer shared by both hfi1, qib and softroce drivers. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/staging/rdma/ipath/ipath_verbs.c | 3 --- drivers/staging/rdma/ipath/ipath_verbs.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c index 15633ec1843f..29e91796fb10 100644 --- a/drivers/staging/rdma/ipath/ipath_verbs.c +++ b/drivers/staging/rdma/ipath/ipath_verbs.c @@ -399,9 +399,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) if (qp->ibqp.qp_type != IB_QPT_UC && qp->ibqp.qp_type != IB_QPT_RC) memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); - else if (wr->opcode == IB_WR_FAST_REG_MR) - memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr), - sizeof(wqe->fast_reg_wr)); else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || wr->opcode == IB_WR_RDMA_WRITE || wr->opcode == IB_WR_RDMA_READ) diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h index ed102a26ec08..0a90a56870ab 100644 --- a/drivers/staging/rdma/ipath/ipath_verbs.h +++ b/drivers/staging/rdma/ipath/ipath_verbs.h @@ -280,7 +280,6 @@ struct ipath_swqe { union { struct ib_send_wr wr; /* don't use wr.sg_list */ struct ib_ud_wr ud_wr; - struct ib_fast_reg_wr fast_reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; }; -- cgit v1.2.3 From 39bfc271bd687be2c8e396e976c0fb9a97963400 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:49 +0300 Subject: IB/core: Remove old fast registration API No callers and no providers left, go ahead and remove it. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 25 ------------------- include/rdma/ib_verbs.h | 54 ----------------------------------------- 2 files changed, 79 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5673df061213..e2e53f9d7a22 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1280,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, } EXPORT_SYMBOL(ib_alloc_mr); -struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device, - int max_page_list_len) -{ - struct ib_fast_reg_page_list *page_list; - - if (!device->alloc_fast_reg_page_list) - return ERR_PTR(-ENOSYS); - - page_list = device->alloc_fast_reg_page_list(device, max_page_list_len); - - if (!IS_ERR(page_list)) { - page_list->device = device; - page_list->max_page_list_len = max_page_list_len; - } - - return page_list; -} -EXPORT_SYMBOL(ib_alloc_fast_reg_page_list); - -void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) -{ - page_list->device->free_fast_reg_page_list(page_list); -} -EXPORT_SYMBOL(ib_free_fast_reg_page_list); - /* Memory windows */ struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 065d37c13aa6..324e9bf8e66c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -737,7 +737,6 @@ enum ib_wc_opcode { IB_WC_BIND_MW, IB_WC_LSO, IB_WC_LOCAL_INV, - IB_WC_FAST_REG_MR, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, @@ -1025,7 +1024,6 @@ enum ib_wr_opcode { IB_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV, - IB_WR_FAST_REG_MR, IB_WR_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, @@ -1064,12 +1062,6 @@ struct ib_sge { u32 lkey; }; -struct ib_fast_reg_page_list { - struct ib_device *device; - u64 *page_list; - unsigned int max_page_list_len; -}; - /** * struct ib_mw_bind_info - Parameters for a memory window bind operation. * @mr: A memory region to bind the memory window to. @@ -1143,22 +1135,6 @@ static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) return container_of(wr, struct ib_ud_wr, wr); } -struct ib_fast_reg_wr { - struct ib_send_wr wr; - u64 iova_start; - struct ib_fast_reg_page_list *page_list; - unsigned int page_shift; - unsigned int page_list_len; - u32 length; - int access_flags; - u32 rkey; -}; - -static inline struct ib_fast_reg_wr *fast_reg_wr(struct ib_send_wr *wr) -{ - return container_of(wr, struct ib_fast_reg_wr, wr); -} - struct ib_reg_wr { struct ib_send_wr wr; struct ib_mr *mr; @@ -1773,9 +1749,6 @@ struct ib_device { int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents); - struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, - int page_list_len); - void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); int (*rereg_phys_mr)(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, @@ -2884,33 +2857,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); -/** - * ib_alloc_fast_reg_page_list - Allocates a page list array - * @device - ib device pointer. - * @page_list_len - size of the page list array to be allocated. - * - * This allocates and returns a struct ib_fast_reg_page_list * and a - * page_list array that is at least page_list_len in size. The actual - * size is returned in max_page_list_len. The caller is responsible - * for initializing the contents of the page_list array before posting - * a send work request with the IB_WC_FAST_REG_MR opcode. - * - * The page_list array entries must be translated using one of the - * ib_dma_*() functions just like the addresses passed to - * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct - * ib_fast_reg_page_list must not be modified by the caller until the - * IB_WC_FAST_REG_MR work request completes. - */ -struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list( - struct ib_device *device, int page_list_len); - -/** - * ib_free_fast_reg_page_list - Deallocates a previously allocated - * page list array. - * @page_list - struct ib_fast_reg_page_list pointer to be deallocated. - */ -void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); - /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. -- cgit v1.2.3 From db7489e07669073970358b6cacf6a9dd8dc9275e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 3 Aug 2015 10:01:52 -0700 Subject: IB/core, cma: Make __attribute_const__ declarations sparse-friendly Move the __attribute_const__ declarations such that sparse understands that these apply to the function itself and not to the return type. This avoids that sparse reports error messages like the following: drivers/infiniband/core/verbs.c:73:12: error: symbol 'ib_event_msg' redeclared with different type (originally declared at include/rdma/ib_verbs.h:470) - different modifiers Fixes: 2b1b5b601230 ("IB/core, cma: Nice log-friendly string helpers") Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/verbs.c | 4 ++-- include/rdma/ib_verbs.h | 4 ++-- include/rdma/rdma_cm.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7e93eb1f33eb..944cd90417bc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -88,7 +88,7 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -const char *rdma_event_msg(enum rdma_cm_event_type event) +const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e2e53f9d7a22..043a60ee6836 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -73,7 +73,7 @@ static const char * const ib_events[] = { [IB_EVENT_GID_CHANGE] = "GID changed", }; -const char *ib_event_msg(enum ib_event_type event) +const char *__attribute_const__ ib_event_msg(enum ib_event_type event) { size_t index = event; @@ -107,7 +107,7 @@ static const char * const wc_statuses[] = { [IB_WC_GENERAL_ERR] = "general error", }; -const char *ib_wc_status_msg(enum ib_wc_status status) +const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) { size_t index = status; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 324e9bf8e66c..9a68a19532ba 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -476,7 +476,7 @@ enum ib_event_type { IB_EVENT_GID_CHANGE, }; -__attribute_const__ const char *ib_event_msg(enum ib_event_type event); +const char *__attribute_const__ ib_event_msg(enum ib_event_type event); struct ib_event { struct ib_device *device; @@ -726,7 +726,7 @@ enum ib_wc_status { IB_WC_GENERAL_ERR }; -__attribute_const__ const char *ib_wc_status_msg(enum ib_wc_status status); +const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status); enum ib_wc_opcode { IB_WC_SEND, diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 463ec0ccbe51..afe44fde72a5 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -62,7 +62,7 @@ enum rdma_cm_event_type { RDMA_CM_EVENT_TIMEWAIT_EXIT }; -__attribute_const__ const char *rdma_event_msg(enum rdma_cm_event_type event); +const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); enum rdma_port_space { RDMA_PS_SDP = 0x0001, -- cgit v1.2.3