From 6dddd93938b3651cfeba7158ac179b4e6d3c1553 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Sun, 19 Feb 2023 08:13:28 +0000 Subject: RDMA/efa: Add data polling capability feature bit Add feature bit to existing device caps field. EFA supports data polling of 128 bytes blocks. The flag indicates that the NIC guarentees that a 128 byte aligned block is written in order, ie that observing the last 8 bits of the block mean the prior 127 bytes are also written. It is useful for "last data polling" acceleration techniques. Link: https://lore.kernel.org/r/20230219081328.10419-1-mrgolin@amazon.com Reviewed-by: Yehuda Yitschak Reviewed-by: Yossi Leybovich Signed-off-by: Yonatan Nachum Signed-off-by: Michael Margolin Acked-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/efa-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 163ac79556d6..74406b4817ce 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -120,6 +120,7 @@ enum { EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3, + EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4, }; struct efa_ibv_ex_query_device_resp { -- cgit v1.2.3 From 6e2a3a324aab9d76e6b864eb6ebc60b197b5141a Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Sun, 19 Mar 2023 14:59:30 +0200 Subject: net/mlx5: Expose bits for enabling out-of-order by default Add needed HW bits for enabling out-of-order by default and use go_back_n when out-of-order is not needed. Signed-off-by: Or Har-Toov Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/75d6dfe263989a05c08c43406132b336ea12d00a.1679230449.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 66d76e97a087..62039b147432 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1077,7 +1077,9 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x17]; + u8 reserved_at_7[0x1]; + u8 qp_ooo_transmit_default[0x1]; + u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; u8 reserved_at_20[0x60]; @@ -1493,7 +1495,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_b0[0x1]; u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; - u8 reserved_at_b3[0x7]; + u8 reserved_at_b3[0x6]; + u8 go_back_n[0x1]; u8 shampo[0x1]; u8 reserved_at_bb[0x5]; @@ -3261,7 +3264,8 @@ struct mlx5_ifc_qpc_bits { u8 log_rq_stride[0x3]; u8 no_sq[0x1]; u8 log_sq_size[0x4]; - u8 reserved_at_55[0x3]; + u8 reserved_at_55[0x1]; + u8 retry_mode[0x2]; u8 ts_format[0x2]; u8 reserved_at_5a[0x1]; u8 rlky[0x1]; -- cgit v1.2.3 From d54bd5abf4d26e1b6722238f75e36069ea91def9 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 15 Mar 2023 01:16:55 -0700 Subject: RDMA/bnxt_re: Add resize_cq support Add resize_cq verb support for user space CQs. Resize operation for kernel CQs are not supported now. Driver should free the current CQ only after user library polls for all the completions and switch to new CQ. So after the resize_cq is returned from the driver, user library polls for existing completions and store it as temporary data. Once library reaps all completions in the current CQ, it invokes the ibv_cmd_poll_cq to inform the driver about the resize_cq completion. Adding a check for user CQs in driver's poll_cq and complete the resize operation for user CQs. Updating uverbs_cmd_mask with poll_cq to support this. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1678868215-23626-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 109 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 + drivers/infiniband/hw/bnxt_re/main.c | 2 + drivers/infiniband/hw/bnxt_re/qplib_fp.c | 44 +++++++++++++ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 5 ++ include/uapi/rdma/bnxt_re-abi.h | 4 ++ 6 files changed, 167 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 989edc789633..e86afecfbe46 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2912,6 +2912,106 @@ fail: return rc; } +static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq) +{ + struct bnxt_re_dev *rdev = cq->rdev; + + bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq); + + cq->qplib_cq.max_wqe = cq->resize_cqe; + if (cq->resize_umem) { + ib_umem_release(cq->umem); + cq->umem = cq->resize_umem; + cq->resize_umem = NULL; + cq->resize_cqe = 0; + } +} + +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + struct bnxt_qplib_sg_info sg_info = {}; + struct bnxt_qplib_dpi *orig_dpi = NULL; + struct bnxt_qplib_dev_attr *dev_attr; + struct bnxt_re_ucontext *uctx = NULL; + struct bnxt_re_resize_cq_req req; + struct bnxt_re_dev *rdev; + struct bnxt_re_cq *cq; + int rc, entries; + + cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); + rdev = cq->rdev; + dev_attr = &rdev->dev_attr; + if (!ibcq->uobject) { + ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported"); + return -EOPNOTSUPP; + } + + if (cq->resize_umem) { + ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy", + cq->qplib_cq.id); + return -EBUSY; + } + + /* Check the requested cq depth out of supported depth */ + if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { + ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d", + cq->qplib_cq.id, cqe); + return -EINVAL; + } + + entries = roundup_pow_of_two(cqe + 1); + if (entries > dev_attr->max_cq_wqes + 1) + entries = dev_attr->max_cq_wqes + 1; + + uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, + ib_uctx); + /* uverbs consumer */ + if (ib_copy_from_udata(&req, udata, sizeof(req))) { + rc = -EFAULT; + goto fail; + } + + cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va, + entries * sizeof(struct cq_base), + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->resize_umem)) { + rc = PTR_ERR(cq->resize_umem); + cq->resize_umem = NULL; + ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %d\n", + __func__, rc); + goto fail; + } + cq->resize_cqe = entries; + memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info)); + orig_dpi = cq->qplib_cq.dpi; + + cq->qplib_cq.sg_info.umem = cq->resize_umem; + cq->qplib_cq.sg_info.pgsize = PAGE_SIZE; + cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT; + cq->qplib_cq.dpi = &uctx->dpi; + + rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries); + if (rc) { + ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!", + cq->qplib_cq.id); + goto fail; + } + + cq->ib_cq.cqe = cq->resize_cqe; + + return 0; + +fail: + if (cq->resize_umem) { + ib_umem_release(cq->resize_umem); + cq->resize_umem = NULL; + cq->resize_cqe = 0; + memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info)); + cq->qplib_cq.dpi = orig_dpi; + } + return rc; +} + static u8 __req_to_ib_wc_status(u8 qstatus) { switch (qstatus) { @@ -3425,6 +3525,15 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) struct bnxt_re_sqp_entries *sqp_entry = NULL; unsigned long flags; + /* User CQ; the only processing we do is to + * complete any pending CQ resize operation. + */ + if (cq->umem) { + if (cq->resize_umem) + bnxt_re_resize_cq_complete(cq); + return 0; + } + spin_lock_irqsave(&cq->cq_lock, flags); budget = min_t(u32, num_entries, cq->max_cql); num_entries = budget; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 94326267f9bb..31f7e34040f7 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -104,6 +104,8 @@ struct bnxt_re_cq { #define MAX_CQL_PER_POLL 1024 u32 max_cql; struct ib_umem *umem; + struct ib_umem *resize_umem; + int resize_cqe; }; struct bnxt_re_mr { @@ -191,6 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); +int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c5867e78f231..48bbba7df22e 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -553,6 +553,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, .req_notify_cq = bnxt_re_req_notify_cq, + .resize_cq = bnxt_re_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq), INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd), @@ -584,6 +585,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) return ret; dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX); + ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ); return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 96e581ced50e..1d769a3106f6 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2100,6 +2100,50 @@ exit: return rc; } +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, + struct bnxt_qplib_cq *cq) +{ + bnxt_qplib_free_hwq(res, &cq->hwq); + memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq)); +} + +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, + int new_cqes) +{ + struct bnxt_qplib_hwq_attr hwq_attr = {}; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_resize_cq_resp resp = {}; + struct cmdq_resize_cq req = {}; + struct bnxt_qplib_pbl *pbl; + u32 pg_sz, lvl, new_sz; + u16 cmd_flags = 0; + int rc; + + RCFW_CMD_PREP(req, RESIZE_CQ, cmd_flags); + hwq_attr.sginfo = &cq->sg_info; + hwq_attr.res = res; + hwq_attr.depth = new_cqes; + hwq_attr.stride = sizeof(struct cq_base); + hwq_attr.type = HWQ_TYPE_QUEUE; + rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr); + if (rc) + return rc; + + req.cq_cid = cpu_to_le32(cq->id); + pbl = &cq->resize_hwq.pbl[PBL_LVL_0]; + pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq); + lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) & + CMDQ_RESIZE_CQ_LVL_MASK; + new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) & + CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK; + req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl); + req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]); + + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, + (void *)&resp, NULL, 0); + return rc; +} + int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 037501952543..d74d5ead2e32 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -400,6 +400,7 @@ struct bnxt_qplib_cq { u16 count; u16 period; struct bnxt_qplib_hwq hwq; + struct bnxt_qplib_hwq resize_hwq; u32 cnq_hw_ring_id; struct bnxt_qplib_nq *nq; bool resize_in_progress; @@ -532,6 +533,10 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp); int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swqe *wqe); int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq); +int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, + int new_cqes); +void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, + struct bnxt_qplib_cq *cq); int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq); int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num, struct bnxt_qplib_qp **qp); diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index b1de99bf56ce..c4e90775da0c 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -96,6 +96,10 @@ struct bnxt_re_cq_resp { __u32 rsvd; }; +struct bnxt_re_resize_cq_req { + __aligned_u64 cq_va; +}; + struct bnxt_re_qp_req { __aligned_u64 qpsva; __aligned_u64 qprva; -- cgit v1.2.3 From 77f7eb9f3416aace703971156133926e44e2195b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 23 Mar 2023 12:13:51 +0200 Subject: net/mlx5: Introduce other vport query for Q-counters These new fields in QUERY_Q_COUNTER command allow us to access another vport counters during the query command, which is specially useful to query representor vports. In addition also add the required caps to check if this capability is actually supported. Signed-off-by: Patrisious Haddad Reviewed-by: Michael Guralnik Link: https://lore.kernel.org/r/75c73a4a0e60f18c37b35a4a11ca2e2415e4a6f3.1679566038.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 62039b147432..e4306cd87cd7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1729,7 +1729,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_transport_domain[0x5]; u8 reserved_at_328[0x3]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0xb]; + u8 reserved_at_330[0x9]; + u8 q_counter_aggregation[0x1]; + u8 q_counter_other_vport[0x1]; u8 log_max_xrcd[0x5]; u8 nic_receive_steering_discard[0x1]; @@ -5603,10 +5605,15 @@ struct mlx5_ifc_query_q_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x80]; + u8 other_vport[0x1]; + u8 reserved_at_41[0xf]; + u8 vport_number[0x10]; + + u8 reserved_at_60[0x60]; u8 clear[0x1]; - u8 reserved_at_c1[0x1f]; + u8 aggregate[0x1]; + u8 reserved_at_c2[0x1e]; u8 reserved_at_e0[0x18]; u8 counter_set_id[0x8]; -- cgit v1.2.3 From 8d7c7c0eeb74281c846ef9231ce20536c79a99b4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 14 Apr 2023 10:58:29 -0300 Subject: RDMA: Add ib_virt_dma_to_page() Make it clearer what is going on by adding a function to go back from the "virtual" dma_addr to a kva and another to a struct page. This is used in the ib_uses_virt_dma() style drivers (siw, rxe, hfi, qib). Call them instead of a naked casting and virt_to_page() when working with dma_addr values encoded by the various ib_map functions. This also fixes the virt_to_page() casting problem Linus Walleij has been chasing. Cc: Linus Walleij Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/0-v2-05ea785520ed+10-ib_virt_page_jgg@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_mr.c | 16 ++++++++-------- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +- drivers/infiniband/sw/siw/siw_qp_rx.c | 6 +++--- drivers/infiniband/sw/siw/siw_qp_tx.c | 19 ++++++------------- drivers/infiniband/sw/siw/siw_verbs.c | 4 ++-- include/rdma/ib_verbs.h | 25 +++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 1e17f8086d59..0e538fafcc20 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -210,10 +210,10 @@ err1: return err; } -static int rxe_set_page(struct ib_mr *ibmr, u64 iova) +static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr) { struct rxe_mr *mr = to_rmr(ibmr); - struct page *page = virt_to_page(iova & mr->page_mask); + struct page *page = ib_virt_dma_to_page(dma_addr); bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT); int err; @@ -279,16 +279,16 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, return 0; } -static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr, +static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr, unsigned int length, enum rxe_mr_copy_dir dir) { - unsigned int page_offset = iova & (PAGE_SIZE - 1); + unsigned int page_offset = dma_addr & (PAGE_SIZE - 1); unsigned int bytes; struct page *page; u8 *va; while (length) { - page = virt_to_page(iova & mr->page_mask); + page = ib_virt_dma_to_page(dma_addr); bytes = min_t(unsigned int, length, PAGE_SIZE - page_offset); va = kmap_local_page(page); @@ -300,7 +300,7 @@ static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 iova, void *addr, kunmap_local(va); page_offset = 0; - iova += bytes; + dma_addr += bytes; addr += bytes; length -= bytes; } @@ -488,7 +488,7 @@ int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, if (mr->ibmr.type == IB_MR_TYPE_DMA) { page_offset = iova & (PAGE_SIZE - 1); - page = virt_to_page(iova & PAGE_MASK); + page = ib_virt_dma_to_page(iova); } else { unsigned long index; int err; @@ -545,7 +545,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) if (mr->ibmr.type == IB_MR_TYPE_DMA) { page_offset = iova & (PAGE_SIZE - 1); - page = virt_to_page(iova & PAGE_MASK); + page = ib_virt_dma_to_page(iova); } else { unsigned long index; int err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4e2db7c2e4ed..12819153bda7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -760,7 +760,7 @@ static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe, int i; for (i = 0; i < ibwr->num_sge; i++, sge++) { - memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); + memcpy(p, ib_virt_dma_to_page(sge->addr), sge->length); p += sge->length; } } diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index fd721cc19682..58bbf738e4e5 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -139,7 +139,7 @@ static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, break; bytes = min(bytes, len); - if (siw_rx_kva(srx, (void *)(uintptr_t)buf_addr, bytes) == + if (siw_rx_kva(srx, ib_virt_dma_to_ptr(buf_addr), bytes) == bytes) { copied += bytes; offset += bytes; @@ -487,7 +487,7 @@ int siw_proc_send(struct siw_qp *qp) mem_p = *mem; if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(uintptr_t)(sge->laddr + frx->sge_off), + ib_virt_dma_to_ptr(sge->laddr + frx->sge_off), sge_bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, @@ -852,7 +852,7 @@ int siw_proc_rresp(struct siw_qp *qp) if (mem_p->mem_obj == NULL) rv = siw_rx_kva(srx, - (void *)(uintptr_t)(sge->laddr + wqe->processed), + ib_virt_dma_to_ptr(sge->laddr + wqe->processed), bytes); else if (!mem_p->is_pbl) rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 6bb9e9e81ff4..4b292e0504f1 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); if (paddr) - return virt_to_page((void *)(uintptr_t)paddr); + return ib_virt_dma_to_page(paddr); return NULL; } @@ -56,8 +56,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) if (!mem->mem_obj) { /* Kernel client using kva */ - memcpy(paddr, - (const void *)(uintptr_t)sge->laddr, bytes); + memcpy(paddr, ib_virt_dma_to_ptr(sge->laddr), bytes); } else if (c_tx->in_syscall) { if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), bytes)) @@ -477,7 +476,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * or memory region with assigned kernel buffer */ iov[seg].iov_base = - (void *)(uintptr_t)(sge->laddr + sge_off); + ib_virt_dma_to_ptr(sge->laddr + sge_off); iov[seg].iov_len = sge_len; if (do_crc) @@ -537,19 +536,13 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) * Cast to an uintptr_t to preserve all 64 bits * in sge->laddr. */ - uintptr_t va = (uintptr_t)(sge->laddr + sge_off); + u64 va = sge->laddr + sge_off; - /* - * virt_to_page() takes a (void *) pointer - * so cast to a (void *) meaning it will be 64 - * bits on a 64 bit platform and 32 bits on a - * 32 bit platform. - */ - page_array[seg] = virt_to_page((void *)(va & PAGE_MASK)); + page_array[seg] = ib_virt_dma_to_page(va); if (do_crc) crypto_shash_update( c_tx->mpa_crc_hd, - (void *)va, + ib_virt_dma_to_ptr(va), plen); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 906fde1a2a0d..398ec13db624 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -660,7 +660,7 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, bytes = -EINVAL; break; } - memcpy(kbuf, (void *)(uintptr_t)core_sge->addr, + memcpy(kbuf, ib_virt_dma_to_ptr(core_sge->addr), core_sge->length); kbuf += core_sge->length; @@ -1523,7 +1523,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, } siw_dbg_mem(mem, "sge[%d], size %u, addr 0x%p, total %lu\n", - i, pble->size, (void *)(uintptr_t)pble->addr, + i, pble->size, ib_virt_dma_to_ptr(pble->addr), pbl_size); } rv = ib_sg_to_pages(base_mr, sl, num_sle, sg_off, siw_set_pbl_page); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 949cf4ffc536..1e7774ac808f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4035,6 +4035,31 @@ static inline bool ib_dma_pci_p2p_dma_supported(struct ib_device *dev) return dma_pci_p2pdma_supported(dev->dma_device); } +/** + * ib_virt_dma_to_ptr - Convert a dma_addr to a kernel pointer + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() devices to get back to the kernel pointer after + * going through the dma_addr marshalling. + */ +static inline void *ib_virt_dma_to_ptr(u64 dma_addr) +{ + /* virt_dma mode maps the kvs's directly into the dma addr */ + return (void *)(uintptr_t)dma_addr; +} + +/** + * ib_virt_dma_to_page - Convert a dma_addr to a struct page + * @dma_addr: The DMA address + * + * Used by ib_uses_virt_dma() device to get back to the struct page after going + * through the dma_addr marshalling. + */ +static inline struct page *ib_virt_dma_to_page(u64 dma_addr) +{ + return virt_to_page(ib_virt_dma_to_ptr(dma_addr)); +} + /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created -- cgit v1.2.3 From ccbbfe0682f2fff1e157413c30092dd27c50e20e Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Mon, 10 Apr 2023 16:07:52 +0300 Subject: net/mlx5: Update relaxed ordering read HCA capabilities Rename existing HCA capability relaxed_ordering_read to relaxed_ordering_read_pci_enabled. This is in accordance with recent PRM change to better describe the capability, as it's set only if both the device supports relaxed ordering (RO) read and RO is enabled in PCI config space. In addition, add new HCA capability relaxed_ordering_read which is set if the device supports RO read, regardless of RO in PCI config space. This will be used in the following patch to allow RO in VFs and VMs. Signed-off-by: Avihai Horon Reviewed-by: Shay Drory Link: https://lore.kernel.org/r/caa0002fd8135086357dfcc368e2f5cc73b08480.1681131553.git.leon@kernel.org Reviewed-by: Jacob Keller Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 5 +++-- drivers/infiniband/hw/mlx5/umr.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bd0a818ba1cd..6a3a8e00bfaa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -70,7 +70,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, if (acc & IB_ACCESS_RELAXED_ORDERING) { if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + if (MLX5_CAP_GEN(dev->mdev, + relaxed_ordering_read_pci_enabled) && pcie_relaxed_ordering_enabled(dev->mdev->pdev)) MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); } @@ -791,7 +792,7 @@ static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev, ret |= IB_ACCESS_RELAXED_ORDERING; if ((access_flags & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) ret |= IB_ACCESS_RELAXED_ORDERING; diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h index c9d0021381a2..e12ecd7e079c 100644 --- a/drivers/infiniband/hw/mlx5/umr.h +++ b/drivers/infiniband/hw/mlx5/umr.h @@ -62,7 +62,7 @@ static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev, return false; if ((diffs & IB_ACCESS_RELAXED_ORDERING) && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 993af4c12d90..3c765a1f91a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -41,7 +41,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) { bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev); bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write); - bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read); + bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read_pci_enabled); MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read); MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_write); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index e4306cd87cd7..b54339a1b1c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1511,7 +1511,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_eq_sz[0x8]; u8 relaxed_ordering_write[0x1]; - u8 relaxed_ordering_read[0x1]; + u8 relaxed_ordering_read_pci_enabled[0x1]; u8 log_max_mkey[0x6]; u8 reserved_at_f0[0x6]; u8 terminate_scatter_list_mkey[0x1]; @@ -1727,7 +1727,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_320[0x3]; u8 log_max_transport_domain[0x5]; - u8 reserved_at_328[0x3]; + u8 reserved_at_328[0x2]; + u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; u8 reserved_at_330[0x9]; u8 q_counter_aggregation[0x1]; -- cgit v1.2.3 From 3e358ea8614ddfbc59ca7a3f5dff5dde2b350b2c Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 13 Apr 2023 12:23:09 +0300 Subject: RDMA/mlx5: Fix flow counter query via DEVX Commit cited in "fixes" tag added bulk support for flow counters but it didn't account that's also possible to query a counter using a non-base id if the counter was allocated as bulk. When a user performs a query, validate the flow counter id given in the mailbox is inside the valid range taking bulk value into account. Fixes: 208d70f562e5 ("IB/mlx5: Support flow counters offset for bulk counters") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/79d7fbe291690128e44672418934256254d93115.1681377114.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 31 ++++++++++++++++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 07037b829c7e..db5fb196c728 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -666,7 +666,21 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs, obj_id; case MLX5_IB_OBJECT_DEVX_OBJ: - return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + { + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + struct devx_obj *devx_uobj = uobj->object; + + if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER && + devx_uobj->flow_counter_bulk_size) { + u64 end; + + end = devx_uobj->obj_id + + devx_uobj->flow_counter_bulk_size; + return devx_uobj->obj_id <= obj_id && end > obj_id; + } + + return devx_uobj->obj_id == obj_id; + } default: return false; @@ -1517,10 +1531,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( goto obj_free; if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { - u8 bulk = MLX5_GET(alloc_flow_counter_in, - cmd_in, - flow_counter_bulk); - obj->flow_counter_bulk_size = 128UL * bulk; + u32 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk_log_size); + + if (bulk) + bulk = 1 << bulk; + else + bulk = 128UL * MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = bulk; } uobj->object = obj; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b54339a1b1c6..3976e6266bcc 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9283,7 +9283,8 @@ struct mlx5_ifc_alloc_flow_counter_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 reserved_at_40[0x38]; + u8 reserved_at_40[0x33]; + u8 flow_counter_bulk_log_size[0x5]; u8 flow_counter_bulk[0x8]; }; -- cgit v1.2.3 From 531094dc7164718d28ebb581d729807d7e846363 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Tue, 4 Apr 2023 15:43:13 +0000 Subject: RDMA/efa: Add rdma write capability to device caps Add rdma write capability that is propagated from the device to rdma-core. Enable MR creation with remote write permissions according to this device capability. Link: https://lore.kernel.org/r/20230404154313.35194-1-ynachum@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Michael Margolin Signed-off-by: Yonatan Nachum Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 12 +++++-- drivers/infiniband/hw/efa/efa_io_defs.h | 42 +++++++++++++++++-------- drivers/infiniband/hw/efa/efa_verbs.c | 6 +++- include/uapi/rdma/efa-abi.h | 1 + 4 files changed, 44 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 3db791e6c030..4e93ef7f84ee 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -376,7 +376,9 @@ struct efa_admin_reg_mr_cmd { * 0 : local_write_enable - Local write permissions: * must be set for RQ buffers and buffers posted for * RDMA Read requests - * 1 : reserved1 - MBZ + * 1 : remote_write_enable - Remote write + * permissions: must be set to enable RDMA write to + * the region * 2 : remote_read_enable - Remote read permissions: * must be set to enable RDMA read from the region * 7:3 : reserved2 - MBZ @@ -620,7 +622,9 @@ struct efa_admin_feature_device_attr_desc { * modify QP command * 2 : data_polling_128 - If set, 128 bytes data * polling is supported - * 31:3 : reserved - MBZ + * 3 : rdma_write - If set, RDMA Write is supported + * on TX queues + * 31:4 : reserved - MBZ */ u32 device_caps; @@ -674,7 +678,7 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum size of LLQ in bytes */ u32 max_llq_size; - /* Maximum number of SGEs for a single RDMA read WQE */ + /* Maximum number of SGEs for a single RDMA read/write WQE */ u16 max_wr_rdma_sges; /* @@ -979,6 +983,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) +#define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1) #define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ @@ -994,6 +999,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3) /* create_eq_cmd */ #define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h index 17ba8984b11e..2d8eb96eaa81 100644 --- a/drivers/infiniband/hw/efa/efa_io_defs.h +++ b/drivers/infiniband/hw/efa/efa_io_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_IO_H_ @@ -23,6 +23,8 @@ enum efa_io_send_op_type { EFA_IO_SEND = 0, /* RDMA read */ EFA_IO_RDMA_READ = 1, + /* RDMA write */ + EFA_IO_RDMA_WRITE = 2, }; enum efa_io_comp_status { @@ -62,8 +64,7 @@ struct efa_io_tx_meta_desc { /* * control flags - * 3:0 : op_type - operation type: send/rdma/fast mem - * ops/etc + * 3:0 : op_type - enum efa_io_send_op_type * 4 : has_imm - immediate_data field carries valid * data. * 5 : inline_msg - inline mode - inline message data @@ -219,21 +220,22 @@ struct efa_io_cdesc_common { * 2:1 : q_type - enum efa_io_queue_type: send/recv * 3 : has_imm - indicates that immediate data is * present - for RX completions only - * 7:4 : reserved28 - MBZ + * 6:4 : op_type - enum efa_io_send_op_type + * 7 : reserved31 - MBZ */ u8 flags; /* local QP number */ u16 qp_num; - - /* Transferred length */ - u16 length; }; /* Tx completion descriptor */ struct efa_io_tx_cdesc { /* Common completion info */ struct efa_io_cdesc_common common; + + /* MBZ */ + u16 reserved16; }; /* Rx Completion Descriptor */ @@ -241,6 +243,9 @@ struct efa_io_rx_cdesc { /* Common completion info */ struct efa_io_cdesc_common common; + /* Transferred length bits[15:0] */ + u16 length; + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ u16 ah; @@ -250,16 +255,26 @@ struct efa_io_rx_cdesc { u32 imm; }; +/* Rx Completion Descriptor RDMA write info */ +struct efa_io_rx_cdesc_rdma_write { + /* Transferred length bits[31:16] */ + u16 length_hi; +}; + /* Extended Rx Completion Descriptor */ struct efa_io_rx_cdesc_ex { /* Base RX completion info */ - struct efa_io_rx_cdesc rx_cdesc_base; + struct efa_io_rx_cdesc base; - /* - * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is - * enabled. - */ - u8 src_addr[16]; + union { + struct efa_io_rx_cdesc_rdma_write rdma_write; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ + * set_src_addr is enabled. + */ + u8 src_addr[16]; + } u; }; /* tx_meta_desc */ @@ -285,5 +300,6 @@ struct efa_io_rx_cdesc_ex { #define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) #define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) #define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) +#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4) #endif /* _EFA_IO_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index c20136e9d3d1..8eca6c14d0cf 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -253,6 +253,9 @@ int efa_query_device(struct ib_device *ibdev, if (EFA_DEV_CAP(dev, DATA_POLLING_128)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128; + if (EFA_DEV_CAP(dev, RDMA_WRITE)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE; + if (dev->neqs) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; @@ -1572,7 +1575,8 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags, supp_access_flags = IB_ACCESS_LOCAL_WRITE | - (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0); + (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) | + (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0); access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 74406b4817ce..d94c32f28804 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -121,6 +121,7 @@ enum { EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3, EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128 = 1 << 4, + EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5, }; struct efa_ibv_ex_query_device_resp { -- cgit v1.2.3