diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:57:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:57:10 -0800 |
commit | 5d24ae67a961c51beb255a28c9c417d9710247c2 (patch) | |
tree | c23c71b2f17f4502554c80b84be476e4c08f7160 /drivers/infiniband/hw | |
parent | 938edb8a31b976c9a92eb0cd4ff481e93f76c1f1 (diff) | |
parent | f617e5ffe04fd46010b618c9eeadaa04588704c9 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a fairly typical cycle, with the usual sorts of driver
updates. Several series continue to come through which improve and
modernize various parts of the core code, and we finally are starting
to get the uAPI command interface cleaned up.
- Various driver fixes for bnxt_re, cxgb3/4, hfi1, hns, i40iw, mlx4,
mlx5, qib, rxe, usnic
- Rework the entire syscall flow for uverbs to be able to run over
ioctl(). Finally getting past the historic bad choice to use
write() for command execution
- More functional coverage with the mlx5 'devx' user API
- Start of the HFI1 series for 'TID RDMA'
- SRQ support in the hns driver
- Support for new IBTA defined 2x lane widths
- A big series to consolidate all the driver function pointers into a
big struct and have drivers provide a 'static const' version of the
struct instead of open coding initialization
- New 'advise_mr' uAPI to control device caching/loading of page
tables
- Support for inline data in SRPT
- Modernize how umad uses the driver core and creates cdev's and
sysfs files
- First steps toward removing 'uobject' from the view of the drivers"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (193 commits)
RDMA/srpt: Use kmem_cache_free() instead of kfree()
RDMA/mlx5: Signedness bug in UVERBS_HANDLER()
IB/uverbs: Signedness bug in UVERBS_HANDLER()
IB/mlx5: Allocate the per-port Q counter shared when DEVX is supported
IB/umad: Start using dev_groups of class
IB/umad: Use class_groups and let core create class file
IB/umad: Refactor code to use cdev_device_add()
IB/umad: Avoid destroying device while it is accessed
IB/umad: Simplify and avoid dynamic allocation of class
IB/mlx5: Fix wrong error unwind
IB/mlx4: Remove set but not used variable 'pd'
RDMA/iwcm: Don't copy past the end of dev_name() string
IB/mlx5: Fix long EEH recover time with NVMe offloads
IB/mlx5: Simplify netdev unbinding
IB/core: Move query port to ioctl
RDMA/nldev: Expose port_cap_flags2
IB/core: uverbs copy to struct or zero helper
IB/rxe: Reuse code which sets port state
IB/rxe: Make counters thread safe
IB/mlx5: Use the correct commands for UMEM and UCTX allocation
...
Diffstat (limited to 'drivers/infiniband/hw')
108 files changed, 3681 insertions, 1609 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 54fdd4cf5288..1e2515e2eb62 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -647,13 +647,14 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); return rc; @@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah) struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); @@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - if (ib_pd->uobject && + if (udata && !rdma_is_multicast_addr((struct in6_addr *) grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) @@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); goto fail; } /* Write AVID to shared page. */ - if (ib_pd->uobject) { + if (udata) { struct ib_ucontext *ib_uctx = ib_pd->uobject->context; struct bnxt_re_ucontext *uctx; unsigned long flag; @@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah); + &rdev->sqp_ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH for shadow QP"); @@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah /* Have DMAC same as SMAC */ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH for Shadow QP"); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index aa33e7b82c84..c4af72604b4f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, int bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 77f095e5fbe3..e7a997f2a537 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) ib_unregister_device(&rdev->ibdev); } +static const struct ib_device_ops bnxt_re_dev_ops = { + .add_gid = bnxt_re_add_gid, + .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats, + .alloc_mr = bnxt_re_alloc_mr, + .alloc_pd = bnxt_re_alloc_pd, + .alloc_ucontext = bnxt_re_alloc_ucontext, + .create_ah = bnxt_re_create_ah, + .create_cq = bnxt_re_create_cq, + .create_qp = bnxt_re_create_qp, + .create_srq = bnxt_re_create_srq, + .dealloc_pd = bnxt_re_dealloc_pd, + .dealloc_ucontext = bnxt_re_dealloc_ucontext, + .del_gid = bnxt_re_del_gid, + .dereg_mr = bnxt_re_dereg_mr, + .destroy_ah = bnxt_re_destroy_ah, + .destroy_cq = bnxt_re_destroy_cq, + .destroy_qp = bnxt_re_destroy_qp, + .destroy_srq = bnxt_re_destroy_srq, + .get_dev_fw_str = bnxt_re_query_fw_str, + .get_dma_mr = bnxt_re_get_dma_mr, + .get_hw_stats = bnxt_re_ib_get_hw_stats, + .get_link_layer = bnxt_re_get_link_layer, + .get_netdev = bnxt_re_get_netdev, + .get_port_immutable = bnxt_re_get_port_immutable, + .map_mr_sg = bnxt_re_map_mr_sg, + .mmap = bnxt_re_mmap, + .modify_ah = bnxt_re_modify_ah, + .modify_device = bnxt_re_modify_device, + .modify_qp = bnxt_re_modify_qp, + .modify_srq = bnxt_re_modify_srq, + .poll_cq = bnxt_re_poll_cq, + .post_recv = bnxt_re_post_recv, + .post_send = bnxt_re_post_send, + .post_srq_recv = bnxt_re_post_srq_recv, + .query_ah = bnxt_re_query_ah, + .query_device = bnxt_re_query_device, + .query_pkey = bnxt_re_query_pkey, + .query_port = bnxt_re_query_port, + .query_qp = bnxt_re_query_qp, + .query_srq = bnxt_re_query_srq, + .reg_user_mr = bnxt_re_reg_user_mr, + .req_notify_cq = bnxt_re_req_notify_cq, +}; + static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; @@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) (1ull << IB_USER_VERBS_CMD_DESTROY_AH); /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - /* Kernel verbs */ - ibdev->query_device = bnxt_re_query_device; - ibdev->modify_device = bnxt_re_modify_device; - - ibdev->query_port = bnxt_re_query_port; - ibdev->get_port_immutable = bnxt_re_get_port_immutable; - ibdev->get_dev_fw_str = bnxt_re_query_fw_str; - ibdev->query_pkey = bnxt_re_query_pkey; - ibdev->get_netdev = bnxt_re_get_netdev; - ibdev->add_gid = bnxt_re_add_gid; - ibdev->del_gid = bnxt_re_del_gid; - ibdev->get_link_layer = bnxt_re_get_link_layer; - - ibdev->alloc_pd = bnxt_re_alloc_pd; - ibdev->dealloc_pd = bnxt_re_dealloc_pd; - - ibdev->create_ah = bnxt_re_create_ah; - ibdev->modify_ah = bnxt_re_modify_ah; - ibdev->query_ah = bnxt_re_query_ah; - ibdev->destroy_ah = bnxt_re_destroy_ah; - - ibdev->create_srq = bnxt_re_create_srq; - ibdev->modify_srq = bnxt_re_modify_srq; - ibdev->query_srq = bnxt_re_query_srq; - ibdev->destroy_srq = bnxt_re_destroy_srq; - ibdev->post_srq_recv = bnxt_re_post_srq_recv; - - ibdev->create_qp = bnxt_re_create_qp; - ibdev->modify_qp = bnxt_re_modify_qp; - ibdev->query_qp = bnxt_re_query_qp; - ibdev->destroy_qp = bnxt_re_destroy_qp; - - ibdev->post_send = bnxt_re_post_send; - ibdev->post_recv = bnxt_re_post_recv; - - ibdev->create_cq = bnxt_re_create_cq; - ibdev->destroy_cq = bnxt_re_destroy_cq; - ibdev->poll_cq = bnxt_re_poll_cq; - ibdev->req_notify_cq = bnxt_re_req_notify_cq; - - ibdev->get_dma_mr = bnxt_re_get_dma_mr; - ibdev->dereg_mr = bnxt_re_dereg_mr; - ibdev->alloc_mr = bnxt_re_alloc_mr; - ibdev->map_mr_sg = bnxt_re_map_mr_sg; - - ibdev->reg_user_mr = bnxt_re_reg_user_mr; - ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; - ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; - ibdev->mmap = bnxt_re_mmap; - ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; - ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); return ib_register_device(ibdev, "bnxt_re%d", NULL); } @@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_ver_get_output resp = {0}; + struct hwrm_ver_get_input req = {0}; + struct bnxt_fw_msg fw_msg; + int rc = 0; + + memset(&fw_msg, 0, sizeof(fw_msg)); + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, + HWRM_VER_GET, -1, -1); + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; + req.hwrm_intf_min = HWRM_VERSION_MINOR; + req.hwrm_intf_upd = HWRM_VERSION_UPDATE; + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to query HW version, rc = 0x%x", rc); + return; + } + rdev->qplib_ctx.hwrm_intf_ver = + (u64)resp.hwrm_intf_major << 48 | + (u64)resp.hwrm_intf_minor << 32 | + (u64)resp.hwrm_intf_build << 16 | + resp.hwrm_intf_patch; +} + static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { int rc; @@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); + bnxt_re_query_hwrm_intf_version(rdev); + /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + &rdev->qplib_ctx, BNXT_RE_MAX_QPC_COUNT); if (rc) { pr_err("Failed to allocate RCFW Channel: %#x\n", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index be4e33e9f962..326805461265 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u16 cbit; int rc; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; rc = wait_event_timeout(rcfw->waitq, !test_bit(cbit, rcfw->cmdq_bitmap), msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); @@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; u16 cbit; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (!test_bit(cbit, rcfw->cmdq_bitmap)) goto done; do { @@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, { struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr; struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + u32 cmdq_depth = rcfw->cmdq_depth; struct bnxt_qplib_crsq *crsqe; u32 sw_prod, cmdq_prod; unsigned long flags; @@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (is_block) cookie |= RCFW_CMD_IS_BLOCKING; @@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); - cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; + cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] + [get_cmdq_idx(sw_prod, cmdq_depth)]; if (!cmdqe) { dev_err(&rcfw->pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; crsqe = &rcfw->crsqe_tbl[cbit]; if (crsqe->resp && crsqe->resp->cookie == mcookie) { @@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { rcfw->pdev = pdev; @@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, "HW channel CREQ allocation failed\n"); goto fail; } - rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0, - &rcfw->cmdq.max_elements, - BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_CTX)) { + if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK) + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256; + else + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; + + rcfw->cmdq.max_elements = rcfw->cmdq_depth; + if (bnxt_qplib_alloc_init_hwq + (rcfw->pdev, &rcfw->cmdq, NULL, 0, + &rcfw->cmdq.max_elements, + BNXT_QPLIB_CMDQE_UNITS, 0, + bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), + HWQ_TYPE_CTX)) { dev_err(&rcfw->pdev->dev, "HW channel CMDQ allocation failed\n"); goto fail; @@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD * + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth * sizeof(unsigned long)); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); if (!rcfw->cmdq_bitmap) @@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); init.cmdq_size_cmdq_lvl = cpu_to_le16( - ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) & + ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) & CMDQ_INIT_CMDQ_SIZE_MASK) | ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) & CMDQ_INIT_CMDQ_LVL_MASK)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9a8687dc0a79..be0ef0e8c53e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -63,32 +63,60 @@ #define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */ +/* Cmdq contains a fix number of a 16-Byte slots */ +struct bnxt_qplib_cmdqe { + u8 data[16]; +}; + /* CMDQ elements */ -#define BNXT_QPLIB_CMDQE_MAX_CNT 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192 #define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe) -#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS) +#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS) + +static inline u32 bnxt_qplib_cmdqe_npages(u32 depth) +{ + u32 npages; + + npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE; + if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE) + npages++; + return npages; +} + +static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) +{ + return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); +} + +static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_page_size(depth) / + BNXT_QPLIB_CMDQE_UNITS); +} -#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1) -#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1) +#define MAX_CMDQ_IDX(depth) ((depth) - 1) + +static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); +} -#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 -/* Cmdq contains a fix number of a 16-Byte slots */ -struct bnxt_qplib_cmdqe { - u8 data[16]; -}; +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val) +static inline u32 get_cmdq_pg(u32 val, u32 depth) { - return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG; + return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / + (bnxt_qplib_cmdqe_cnt_per_pg(depth)); } -static inline u32 get_cmdq_idx(u32 val) +static inline u32 get_cmdq_idx(u32 val, u32 depth) { - return val & MAX_CMDQ_IDX_PER_PG; + return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); } /* Crsq buf is 1024-Byte */ @@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw { struct bnxt_qplib_qp_node *qp_tbl; u64 oos_prev; u32 init_oos_stats; + u32 cmdq_depth; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); + struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, + int qp_tbl_sz); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e5c052da5a9..1e80aa7bbcce 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -177,6 +177,7 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ]; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; + u64 hwrm_intf_ver; }; struct bnxt_qplib_res { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 5216b5f844cc..be03b5738f71 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, } /* AH */ -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_create_ah req; @@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.dest_mac[2] = cpu_to_le16(temp16[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; @@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; @@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.ah_cid = cpu_to_le32(ah->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 8079d7f5a008..39454b3f738d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index dcb4bba522ba..df4f7a3f043d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->sq) goto err3; - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev), depth * sizeof(union t3_wr), &(wq->dma_addr), GFP_KERNEL); if (!wq->queue) goto err4; - memset(wq->queue, 0, depth * sizeof(union t3_wr)); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ebbec02cebe0..b34b1a1bd94b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, * Kernel users need more wq space for fastreg WRs which can take * 2 WR fragments. */ - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL; if (!ucontext && wqsize < (rqsize + (2 * sqsize))) wqsize = roundup_pow_of_two(rqsize + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); @@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops iwch_dev_ops = { + .alloc_hw_stats = iwch_alloc_stats, + .alloc_mr = iwch_alloc_mr, + .alloc_mw = iwch_alloc_mw, + .alloc_pd = iwch_allocate_pd, + .alloc_ucontext = iwch_alloc_ucontext, + .create_cq = iwch_create_cq, + .create_qp = iwch_create_qp, + .dealloc_mw = iwch_dealloc_mw, + .dealloc_pd = iwch_deallocate_pd, + .dealloc_ucontext = iwch_dealloc_ucontext, + .dereg_mr = iwch_dereg_mr, + .destroy_cq = iwch_destroy_cq, + .destroy_qp = iwch_destroy_qp, + .get_dev_fw_str = get_dev_fw_ver_str, + .get_dma_mr = iwch_get_dma_mr, + .get_hw_stats = iwch_get_mib, + .get_port_immutable = iwch_port_immutable, + .map_mr_sg = iwch_map_mr_sg, + .mmap = iwch_mmap, + .modify_qp = iwch_ib_modify_qp, + .poll_cq = iwch_poll_cq, + .post_recv = iwch_post_receive, + .post_send = iwch_post_send, + .query_device = iwch_query_device, + .query_gid = iwch_query_gid, + .query_pkey = iwch_query_pkey, + .query_port = iwch_query_port, + .reg_user_mr = iwch_reg_user_mr, + .req_notify_cq = iwch_arm_cq, + .resize_cq = iwch_resize_cq, +}; + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - dev->ibdev.query_device = iwch_query_device; - dev->ibdev.query_port = iwch_query_port; - dev->ibdev.query_pkey = iwch_query_pkey; - dev->ibdev.query_gid = iwch_query_gid; - dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; - dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; - dev->ibdev.mmap = iwch_mmap; - dev->ibdev.alloc_pd = iwch_allocate_pd; - dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_qp = iwch_create_qp; - dev->ibdev.modify_qp = iwch_ib_modify_qp; - dev->ibdev.destroy_qp = iwch_destroy_qp; - dev->ibdev.create_cq = iwch_create_cq; - dev->ibdev.destroy_cq = iwch_destroy_cq; - dev->ibdev.resize_cq = iwch_resize_cq; - dev->ibdev.poll_cq = iwch_poll_cq; - dev->ibdev.get_dma_mr = iwch_get_dma_mr; - dev->ibdev.reg_user_mr = iwch_reg_user_mr; - dev->ibdev.dereg_mr = iwch_dereg_mr; - dev->ibdev.alloc_mw = iwch_alloc_mw; - dev->ibdev.dealloc_mw = iwch_dealloc_mw; - dev->ibdev.alloc_mr = iwch_alloc_mr; - dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.req_notify_cq = iwch_arm_cq; - dev->ibdev.post_send = iwch_post_send; - dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.alloc_hw_stats = iwch_alloc_stats; - dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = iwch_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); + ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); if (ret) kfree(dev->ibdev.iwcm); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 97ecc8c684f5..8221813219e5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2793,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: (void)stop_ep_timer(ep); - if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || + (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index cbb3c0ddd990..586b0c37481f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) c4iw_restrack_funcs[res->type](msg, res) : 0; } +static const struct ib_device_ops c4iw_dev_ops = { + .alloc_hw_stats = c4iw_alloc_stats, + .alloc_mr = c4iw_alloc_mr, + .alloc_mw = c4iw_alloc_mw, + .alloc_pd = c4iw_allocate_pd, + .alloc_ucontext = c4iw_alloc_ucontext, + .create_cq = c4iw_create_cq, + .create_qp = c4iw_create_qp, + .create_srq = c4iw_create_srq, + .dealloc_mw = c4iw_dealloc_mw, + .dealloc_pd = c4iw_deallocate_pd, + .dealloc_ucontext = c4iw_dealloc_ucontext, + .dereg_mr = c4iw_dereg_mr, + .destroy_cq = c4iw_destroy_cq, + .destroy_qp = c4iw_destroy_qp, + .destroy_srq = c4iw_destroy_srq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = c4iw_get_dma_mr, + .get_hw_stats = c4iw_get_mib, + .get_netdev = get_netdev, + .get_port_immutable = c4iw_port_immutable, + .map_mr_sg = c4iw_map_mr_sg, + .mmap = c4iw_mmap, + .modify_qp = c4iw_ib_modify_qp, + .modify_srq = c4iw_modify_srq, + .poll_cq = c4iw_poll_cq, + .post_recv = c4iw_post_receive, + .post_send = c4iw_post_send, + .post_srq_recv = c4iw_post_srq_recv, + .query_device = c4iw_query_device, + .query_gid = c4iw_query_gid, + .query_pkey = c4iw_query_pkey, + .query_port = c4iw_query_port, + .query_qp = c4iw_ib_query_qp, + .reg_user_mr = c4iw_reg_user_mr, + .req_notify_cq = c4iw_arm_cq, +}; + void c4iw_register_device(struct work_struct *work) { int ret; @@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; - dev->ibdev.query_device = c4iw_query_device; - dev->ibdev.query_port = c4iw_query_port; - dev->ibdev.query_pkey = c4iw_query_pkey; - dev->ibdev.query_gid = c4iw_query_gid; - dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; - dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; - dev->ibdev.mmap = c4iw_mmap; - dev->ibdev.alloc_pd = c4iw_allocate_pd; - dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_qp = c4iw_create_qp; - dev->ibdev.modify_qp = c4iw_ib_modify_qp; - dev->ibdev.query_qp = c4iw_ib_query_qp; - dev->ibdev.destroy_qp = c4iw_destroy_qp; - dev->ibdev.create_srq = c4iw_create_srq; - dev->ibdev.modify_srq = c4iw_modify_srq; - dev->ibdev.destroy_srq = c4iw_destroy_srq; - dev->ibdev.create_cq = c4iw_create_cq; - dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.poll_cq = c4iw_poll_cq; - dev->ibdev.get_dma_mr = c4iw_get_dma_mr; - dev->ibdev.reg_user_mr = c4iw_reg_user_mr; - dev->ibdev.dereg_mr = c4iw_dereg_mr; - dev->ibdev.alloc_mw = c4iw_alloc_mw; - dev->ibdev.dealloc_mw = c4iw_dealloc_mw; - dev->ibdev.alloc_mr = c4iw_alloc_mr; - dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.req_notify_cq = c4iw_arm_cq; - dev->ibdev.post_send = c4iw_post_send; - dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.post_srq_recv = c4iw_post_srq_recv; - dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; - dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = c4iw_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; - dev->ibdev.get_netdev = get_netdev; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { @@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work) rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; + ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); if (ret) goto err_kfree_iwcm; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 13478f3b7057..981ff5cfb5d1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (sqsize < 8) sqsize = 8; - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) @@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> T4_RQT_ENTRY_SHIFT; - wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev, wq->memsize, &wq->dma_addr, GFP_KERNEL); if (!wq->queue) goto err_free_rqtpool; - memset(wq->queue, 0, wq->memsize); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, @@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rqsize = attrs->attr.max_wr + 1; rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ff790390c91a..3ce9dc8c3463 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -34,6 +34,7 @@ hfi1-y := \ ruc.o \ sdma.o \ sysfs.o \ + tid_rdma.o \ trace.o \ uc.o \ ud.o \ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7e6d70936c63..b443642eac02 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c6163a347e93..c0800ea5a3f8 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -935,6 +935,10 @@ #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018) +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32 +#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028) #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7108d4d92259..40d3cfb58bd1 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -136,18 +136,21 @@ HFI1_CAP_ALLOW_PERM_JKEY | \ HFI1_CAP_STATIC_RATE_CTRL | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_TID_UNMAP) + HFI1_CAP_TID_UNMAP | \ + HFI1_CAP_OPFN) /* * A set of capability bits that are "global" and are not allowed to be * set in the user bitmask. */ #define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \ - HFI1_CAP_USE_SDMA_HEAD | \ - HFI1_CAP_EXTENDED_PSN | \ - HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_NO_INTEGRITY | \ - HFI1_CAP_PKEY_CHECK) << \ - HFI1_CAP_USER_SHIFT) + HFI1_CAP_USE_SDMA_HEAD | \ + HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_PRINT_UNIMPL | \ + HFI1_CAP_NO_INTEGRITY | \ + HFI1_CAP_PKEY_CHECK | \ + HFI1_CAP_TID_RDMA | \ + HFI1_CAP_OPFN) << \ + HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in * order to be allowed for user contexts, as well. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 9f992ae36c89..0a557795563c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds); DEBUGFS_SEQ_FILE_OPEN(rcds) DEBUGFS_FILE_OPS(rcds); +static void *_pios_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void _pios_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _pios_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct send_context_info *sci; + loff_t *spos = v; + loff_t i = *spos; + unsigned long flags; + + spin_lock_irqsave(&dd->sc_lock, flags); + sci = &dd->send_contexts[i]; + if (sci && sci->type != SC_USER && sci->allocated && sci->sc) + seqfile_dump_sci(s, i, sci); + spin_unlock_irqrestore(&dd->sc_lock, flags); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(pios); +DEBUGFS_SEQ_FILE_OPEN(pios) +DEBUGFS_FILE_OPS(pios); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f85558312..a8ad70730203 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_process_ecn_slowpath - Process FECN or BECN bits + * @qp: The packet's destination QP + * @pkt: The packet itself. + * @prescan: Is the caller the RXQ prescan + * + * Process the packet's FECN or BECN bits. By now, the packet + * has already been evaluated whether processing of those bit should + * be done. + * The significance of the @prescan argument is that if the caller + * is the RXQ prescan, a CNP will be send out instead of waiting for the + * normal packet processing to send an ACK with BECN set (or a CNP). + */ +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; - u32 rqpn = 0, bth1; + u32 rqpn = 0; u16 pkey; u32 rlid, slid, dlid = 0; - u8 hdr_type, sc, svc_type; - bool is_mcast = false; + u8 hdr_type, sc, svc_type, opcode; + bool is_mcast = false, ignore_fecn = false, do_cnp = false, + fecn, becn; /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { - is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); dlid = hfi1_16B_get_dlid(pkt->hdr); slid = hfi1_16B_get_slid(pkt->hdr); + is_mcast = hfi1_is_16B_mcast(dlid); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_16B; + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); } else { - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); - dlid = ib_get_dlid(pkt->hdr); + dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) : + ppd->lid; slid = ib_get_slid(pkt->hdr); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_9B; + fecn = ib_bth_get_fecn(ohdr); + becn = ib_bth_get_becn(ohdr); } switch (qp->ibqp.qp_type) { case IB_QPT_UD: - dlid = ppd->lid; rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; @@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, svc_type = IB_CC_SVCTYPE_RC; break; default: - return; + return false; } - bth1 = be32_to_cpu(ohdr->bth[1]); + ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || + (opcode == IB_OPCODE_RC_ACKNOWLEDGE); + /* + * ACKNOWLEDGE packets do not get a CNP but this will be + * guarded by ignore_fecn above. + */ + do_cnp = prescan || + (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + /* Call appropriate CNP handler */ - if (do_cnp && (bth1 & IB_FECN_SMASK)) + if (!ignore_fecn && do_cnp && fecn) hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - u32 lqpn = bth1 & RVT_QPN_MASK; + if (becn) { + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } - + return !ignore_fecn && fecn; } struct ps_mdata { @@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; - int is_ecn = 0; u8 lnh; if (ps_done(&mdata, rhf, rcd)) @@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; /* just in case */ } - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); - - if (!is_ecn) + if (!hfi1_may_ecn(packet)) goto next; + bth1 = be32_to_cpu(packet->ohdr->bth[1]); qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); @@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, packet, true); + hfi1_process_ecn_slowpath(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) if ((!(hfi1_is_16B_mcast(packet->dlid))) && (packet->dlid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { - if (packet->dlid != ppd->lid) + if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2b882347d0c2..6db2276f5c13 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd) return &rcd->ppd->ibport_data; } -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp); -static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_may_ecn - Check whether FECN or BECN processing should be done + * @pkt: the packet to be evaluated + * + * Check whether the FECN or BECN bits in the packet's header are + * enabled, depending on packet type. + * + * This function only checks for FECN and BECN bits. Additional checks + * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to + * ensure correct handling. + */ +static inline bool hfi1_may_ecn(struct hfi1_packet *pkt) { - bool becn; - bool fecn; + bool fecn, becn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); @@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, fecn = ib_bth_get_fecn(pkt->ohdr); becn = ib_bth_get_becn(pkt->ohdr); } - if (unlikely(fecn || becn)) { - hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return fecn; - } + return fecn || becn; +} + +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt) +{ + bool do_work; + + do_work = hfi1_may_ecn(pkt); + if (unlikely(do_work)) + return hfi1_process_ecn_slowpath(qp, pkt, false); return false; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 88a0cf930136..4228393e6c4c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9ab50d2308dc..dd5a5c030066 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, spin_lock_init(&sc->alloc_lock); spin_lock_init(&sc->release_lock); spin_lock_init(&sc->credit_ctrl_lock); + seqlock_init(&sc->waitlock); INIT_LIST_HEAD(&sc->piowait); INIT_WORK(&sc->halt_work, sc_halted); init_waitqueue_head(&sc->halt_wait); @@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) static void sc_piobufavail(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; struct rvt_qp *qp; @@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc) * could end up with QPs on the wait list with the interrupt * disabled. */ - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&sc->waitlock, flags); while (!list_empty(list)) { struct iowait *wait; @@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc) if (!list_empty(list)) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&sc->waitlock, flags); /* Wake up the most starved one first */ if (n) @@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd) kfree(dd->cr_base); dd->cr_base = NULL; } + +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci) +{ + struct send_context *sc = sci->sc; + u64 reg; + + seq_printf(s, "SCI %u: type %u base %u credits %u\n", + i, sci->type, sci->base, sci->credits); + seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", + sc->flags, sc->sw_index, sc->hw_context, sc->group); + seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", + sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); + seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", + sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); + seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", + sc->credit_intr_count, sc->credit_ctrl); + reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); + seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", + (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> + CR_COUNTER_SHIFT, + (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & + SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), + reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); +} diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index aaf372c3e5d6..c9a58b642bdd 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -127,6 +127,8 @@ struct send_context { volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; + seqlock_t waitlock; + spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ @@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci); + #endif /* _PIO_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a016248039f..5344e8993b28 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -459,7 +457,6 @@ static int iowait_sleep( struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -472,9 +469,8 @@ static int iowait_sleep( * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -485,11 +481,11 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; @@ -499,7 +495,7 @@ static int iowait_sleep( } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 188aa4f686a0..be603f35d7e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; unsigned long flags; int ret; - bool is_fecn = false; - bool copy_last = false; + bool copy_last = false, fecn; u32 rkey; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); @@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet, false); + fecn = process_ecn(qp, packet); /* * Process responses (ACKs) before anything else. Note that the @@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { rc_rcv_resp(packet); - if (is_fecn) - goto send_ack; return; } @@ -2347,11 +2346,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2413,11 +2412,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2430,16 +2429,9 @@ send_last: qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & IB_BTH_REQ_ACK) { - if (packet->numpkt == 0) { - rc_cancel_ack(qp); - goto send_ack; - } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { - rc_cancel_ack(qp); - goto send_ack; - } - if (unlikely(is_fecn)) { + if (psn & IB_BTH_REQ_ACK || fecn) { + if (packet->numpkt == 0 || fecn || + qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2480,7 +2472,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(packet, is_fecn); + hfi1_send_rc_ack(packet, fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 891d2386d1ca..b84356e1a4c1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); spin_lock_init(&sde->flushlist_lock); + seqlock_init(&sde->waitlock); /* insure there is always a zero bit */ sde->ahg_bits = 0xfffffffe00000000ULL; @@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; uint i, n = 0, seq, max_idx = 0; - struct hfi1_ibdev *dev = &sde->dd->verbs_dev; u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY @@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) #endif do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */ - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */ list_for_each_entry_safe( wait, @@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) list_del_init(&wait->list); waits[n++] = wait; } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&sde->waitlock, seq)); /* Schedule the most starved one first */ if (n) diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 6dc63d7c5685..1e2e40f79cb2 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -382,6 +382,7 @@ struct sdma_engine { u64 progress_int_cnt; /* private: */ + seqlock_t waitlock; struct list_head dmawait; /* CONFIG SDMA for now, just blindly duplicate */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c new file mode 100644 index 000000000000..da1ecb68a928 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + */ + +#include "hfi.h" +#include "verbs.h" +#include "tid_rdma.h" + +/** + * qp_to_rcd - determine the receive context used by a qp + * @qp - the qp + * + * This routine returns the receive context associated + * with a a qp's qpn. + * + * Returns the context. + */ +static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, + struct rvt_qp *qp) +{ + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + unsigned int ctxt; + + if (qp->ibqp.qp_num == 0) + ctxt = 0; + else + ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % + (dd->n_krcv_queues - 1)) + 1; + + return dd->rcd[ctxt]; +} + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct hfi1_qp_priv *qpriv = qp->priv; + + qpriv->rcd = qp_to_rcd(rdi, qp); + + return 0; +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h new file mode 100644 index 000000000000..6fcd3adcdcc3 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#ifndef HFI1_TID_RDMA_H +#define HFI1_TID_RDMA_H + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr); + +#endif /* HFI1_TID_RDMA_H */ + diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 6aca0c5a7f97..6ba47037c424 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - process_ecn(qp, packet, true); + process_ecn(qp, packet); psn = ib_bth_get_psn(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 4baa8f4d49de..88242fe95eaa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -51,6 +51,7 @@ #include "hfi.h" #include "mad.h" #include "verbs_txreq.h" +#include "trace_ibhdrs.h" #include "qp.h" /* We support only two types - 9B and 16B for now */ @@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 bth0, plen, vl, hwords = 7; u16 len; u8 l4; - struct hfi1_16b_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 nwords; + hdr.hdr_type = HFI1_PKT_TYPE_16B; /* Populate length */ nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.opah.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.opah.u.oth; l4 = OPA_16B_L4_IB_LOCAL; } @@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, /* Convert dwords to flits */ len = (hwords + nwords) >> 1; - hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5); plen = 2 /* PBC */ + hwords + nwords; pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; @@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct ib_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + hdr.hdr_type = HFI1_PKT_TYPE_9B; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.ibh.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.ibh.u.l.oth; lrh0 = HFI1_LRH_GRH; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.ibh.u.oth; lrh0 = HFI1_LRH_BTH; } @@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); + hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) src_qp = hfi1_16B_get_src_qpn(packet->mgmt); } - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); + process_ecn(qp, packet); /* * Get the number of bytes the message was padded by * and drop incomplete packets. diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 3f0aadccd9f6..e5e7fad09f32 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,7 +130,6 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct hfi1_ibdev *dev = &pq->dd->verbs_dev; struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); @@ -144,10 +143,10 @@ static int defer_packet_queue( * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; eagain: return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a365089a9305..ec582d86025f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; int ret = 0; @@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp, */ spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { - write_seqlock(&dev->iowait_lock); + write_seqlock(&sc->waitlock); list_add_tail(&ps->s_txreq->txreq.list, &ps->wait->tx_head); if (list_empty(&priv->s_iowait.list)) { @@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, &sc->piowait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sc->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sc->waitlock); hfi1_qp_unbusy(qp, ps->wait); ret = -EBUSY; } @@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return count; } +static const struct ib_device_ops hfi1_dev_ops = { + .alloc_hw_stats = alloc_hw_stats, + .alloc_rdma_netdev = hfi1_vnic_alloc_rn, + .get_dev_fw_str = hfi1_get_dev_fw_str, + .get_hw_stats = get_hw_stats, + .modify_device = modify_device, + /* keep process mad in the driver */ + .process_mad = hfi1_process_mad, +}; + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = modify_device; - ibdev->alloc_hw_stats = alloc_hw_stats; - ibdev->get_hw_stats = get_hw_stats; - ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; - /* keep process mad in the driver */ - ibdev->process_mad = hfi1_process_mad; - ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; + ib_set_device_ops(ibdev, &hfi1_dev_ops); strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 64c9054db5f3..1ad0b14bdb3c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -71,6 +71,7 @@ struct hfi1_devdata; struct hfi1_packet; #include "iowait.h" +#include "tid_rdma.h" #define HFI1_MAX_RDMA_ATOMIC 16 @@ -156,6 +157,7 @@ struct hfi1_qp_priv { struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ + struct hfi1_ctxtdata *rcd; /* QP's receive context */ u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c9876d9e3cb9..a922db58be14 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - chip_sdma_engines(dd), dd->num_vnic_contexts); + dd->num_sdma, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = chip_sdma_engines(dd); + vinfo->num_tx_q = dd->num_sdma; vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 97bd940a056a..1f81c480e028 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait), &tx->txreq, vnic_sdma->pkts_sent); @@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait->iow, struct hfi1_vnic_sdma, wait); - struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait->iow, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; } diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index cf03404b9d58..004c88b32e13 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 9990dc9eb96a..b3c8c45ec1e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -41,6 +41,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); @@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah) +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_hr_ah(ah)); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 46f65f9f59d0..6300033a448f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -239,6 +239,8 @@ err_free: void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_srq_table(hr_dev); hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); hns_roce_cleanup_mr_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 9549ae51a0dd..927701df5eff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -120,6 +120,10 @@ enum { HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, + HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_MODIFY_SRQC = 0x72, + HNS_ROCE_CMD_QUERY_SRQC = 0x73, + HNS_ROCE_CMD_HW2SW_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 93d4b4ec002d..f4c92a7ac1ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -376,9 +376,6 @@ #define ROCEE_RX_CMQ_TAIL_REG 0x07024 #define ROCEE_RX_CMQ_HEAD_REG 0x07028 -#define ROCEE_VF_MB_CFG0_REG 0x40 -#define ROCEE_VF_MB_STATUS_REG 0x58 - #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d39bdfdb5de9..509e467843f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -111,6 +111,9 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 +#define SRQ_DB_REG 0x230 + enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, @@ -196,6 +199,7 @@ enum { HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), + HNS_ROCE_CAP_FLAG_SRQ = BIT(5), HNS_ROCE_CAP_FLAG_MW = BIT(7), HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), @@ -204,6 +208,8 @@ enum { enum hns_roce_mtt_type { MTT_TYPE_WQE, MTT_TYPE_CQE, + MTT_TYPE_SRQWQE, + MTT_TYPE_IDX }; enum { @@ -339,6 +345,10 @@ struct hns_roce_mr_table { struct hns_roce_hem_table mtpt_table; struct hns_roce_buddy mtt_cqe_buddy; struct hns_roce_hem_table mtt_cqe_table; + struct hns_roce_buddy mtt_srqwqe_buddy; + struct hns_roce_hem_table mtt_srqwqe_table; + struct hns_roce_buddy mtt_idx_buddy; + struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -429,9 +439,37 @@ struct hns_roce_cq { struct completion free; }; +struct hns_roce_idx_que { + struct hns_roce_buf idx_buf; + int entry_sz; + u32 buf_size; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u64 *bitmap; +}; + struct hns_roce_srq { struct ib_srq ibsrq; - int srqn; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); + unsigned long srqn; + int max; + int max_gs; + int wqe_shift; + void __iomem *db_reg_l; + + atomic_t refcount; + struct completion free; + + struct hns_roce_buf buf; + u64 *wrid; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + struct hns_roce_idx_que idx_que; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct mutex mutex; }; struct hns_roce_uar_table { @@ -453,6 +491,12 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; }; +struct hns_roce_srq_table { + struct hns_roce_bitmap bitmap; + struct xarray xa; + struct hns_roce_hem_table table; +}; + struct hns_roce_raq_table { struct hns_roce_buf_list *e_raq_buf; }; @@ -603,6 +647,12 @@ struct hns_roce_aeqe { } qp_event; struct { + __le32 srq; + u32 rsv0; + u32 rsv1; + } srq_event; + + struct { __le32 cq; u32 rsv0; u32 rsv1; @@ -679,7 +729,12 @@ struct hns_roce_caps { u32 max_extend_sg; int num_qps; /* 256k */ int reserved_qps; + u32 max_srq_sg; + int num_srqs; u32 max_wqes; /* 16k */ + u32 max_srqs; + u32 max_srq_wrs; + u32 max_srq_sges; u32 max_sq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */ u32 max_srq_desc_sz; @@ -690,12 +745,16 @@ struct hns_roce_caps { int min_cqes; u32 min_wqes; int reserved_cqs; + int reserved_srqs; + u32 max_srqwqes; int num_aeq_vectors; /* 1 */ int num_comp_vectors; int num_other_vectors; int num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; + u32 num_srqwqe_segs; + u32 num_idx_segs; int reserved_mrws; int reserved_uars; int num_pds; @@ -709,6 +768,8 @@ struct hns_roce_caps { int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; + int srqc_entry_sz; + int idx_entry_sz; u32 pbl_ba_pg_sz; u32 pbl_buf_pg_sz; u32 pbl_hop_num; @@ -737,6 +798,12 @@ struct hns_roce_caps { u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; + u32 srqwqe_ba_pg_sz; + u32 srqwqe_buf_pg_sz; + u32 srqwqe_hop_num; + u32 idx_ba_pg_sz; + u32 idx_buf_pg_sz; + u32 idx_hop_num; u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; @@ -805,6 +872,19 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); + void (*write_srqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, + void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx); + int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); + const struct ib_device_ops *hns_roce_dev_ops; + const struct ib_device_ops *hns_roce_dev_srq_ops; }; struct hns_roce_dev { @@ -839,6 +919,7 @@ struct hns_roce_dev { struct hns_roce_uar_table uar_table; struct hns_roce_mr_table mr_table; struct hns_roce_cq_table cq_table; + struct hns_roce_srq_table srq_table; struct hns_roce_qp_table qp_table; struct hns_roce_eq_table eq_table; @@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, @@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah); +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct ib_ucontext *context, @@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); +int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq); + struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f6faefed96e8..4cdbcafa5915 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || - (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT)) + (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || + (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || + (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX)) return true; return false; @@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = mhop->bt_chunk_size / 8; mhop->hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", table->type); @@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, bt_chunk_size = buf_chunk_size; hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support to init hem table here!\n", @@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { + if ((hr_dev->caps.num_idx_segs)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index e8850d59e780..a650278c6fbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -48,6 +48,8 @@ enum { /* UNMAP HEM */ HEM_TYPE_MTT, HEM_TYPE_CQE, + HEM_TYPE_SRQWQE, + HEM_TYPE_IDX, HEM_TYPE_IRRL, HEM_TYPE_TRRL, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ca05810c92dc..b74c742b000c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp_work *qp_work; struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - int is_user = !!ibqp->pd->uobject; + bool is_user = ibqp->uobject; int is_timeout = 0; int ret; @@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } +static const struct ib_device_ops hns_roce_v1_dev_ops = { + .destroy_qp = hns_roce_v1_destroy_qp, + .modify_cq = hns_roce_v1_modify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .post_recv = hns_roce_v1_post_recv, + .post_send = hns_roce_v1_post_send, + .query_qp = hns_roce_v1_query_qp, + .req_notify_cq = hns_roce_v1_req_notify_cq, +}; + static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, @@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .destroy_cq = hns_roce_v1_destroy_cq, .init_eq = hns_roce_v1_init_eq_table, .cleanup_eq = hns_roce_v1_cleanup_eq_table, + .hns_roce_dev_ops = &hns_roce_v1_dev_ops, }; static const struct of_device_id hns_roce_of_match[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3beb1523e17c..3a669451cf86 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, + int vf_id) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_vf_switch *swt; + int ret; + + swt = (struct hns_roce_vf_switch *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); + swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL); + roce_set_field(swt->fun_id, + VF_SWITCH_DATA_FUN_ID_VF_ID_M, + VF_SWITCH_DATA_FUN_ID_VF_ID_S, + vf_id); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + desc.flag = + cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); + desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; @@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "Set function switch param fail, ret = %d.\n", + ret); + return ret; + } + } hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); @@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; + caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; + caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; @@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; @@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; + caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->idx_entry_sz = 4; caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; @@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->reserved_srqs = 0; caps->reserved_qps = HNS_ROCE_V2_RSV_QPS; caps->qpc_ba_pg_sz = 0; @@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_ba_pg_sz = 0; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; + caps->srqwqe_ba_pg_sz = 0; + caps->srqwqe_buf_pg_sz = 0; + caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM; + caps->idx_ba_pg_sz = 0; + caps->idx_buf_pg_sz = 0; + caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; @@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; + caps->max_srqs = HNS_ROCE_V2_MAX_SRQ; + caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; + caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; + if (hr_dev->pci_dev->revision == 0x21) - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | + HNS_ROCE_CAP_FLAG_SRQ; ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) hns_roce_free_link_table(hr_dev, &priv->tsq); } +static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_mbox_status *mb_st = + (struct hns_roce_mbox_status *)desc.data; + enum hns_roce_cmd_return_status status; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); + + status = hns_roce_cmq_send(hr_dev, &desc, 1); + if (status) + return status; + + return cpu_to_le32(mb_st->mb_status_hw_run); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status >> HNS_ROCE_HW_RUN_BIT_SHIFT; } static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status & HNS_ROCE_HW_MB_STATUS_MASK; } +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); + + mb->in_param_l = cpu_to_le64(in_param); + mb->in_param_h = cpu_to_le64(in_param) >> 32; + mb->out_param_l = cpu_to_le64(out_param); + mb->out_param_h = cpu_to_le64(out_param) >> 32; + mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); + mb->token_event_en = cpu_to_le32(event << 16 | token); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct device *dev = hr_dev->dev; - u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + - ROCEE_VF_MB_CFG0_REG); unsigned long end; - u32 val0 = 0; - u32 val1 = 0; + int ret; end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v2_cmd_pending(hr_dev)) { @@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK, - HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier); - roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK, - HNS_ROCE_VF_MB4_CMD_SHIFT, op); - roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK, - HNS_ROCE_VF_MB5_EVENT_SHIFT, event); - roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, - HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); - - writeq(in_param, hcr + 0); - writeq(out_param, hcr + 2); - - /* Memory barrier */ - wmb(); - - writel(val0, hcr + 4); - writel(val1, hcr + 5); - - mmiowb(); + ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + if (ret) + dev_err(dev, "Post mailbox fail(%d)\n", ret); - return 0; + return ret; } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, @@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + u32 bitmap_num; + int bit_num; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_num = wqe_index / (sizeof(u64) * 8); + bit_num = wqe_index % (sizeof(u64) * 8); + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->tail++; + + spin_unlock(&srq->lock); +} + static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & 0xffffff; @@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, struct hns_roce_v2_cqe *cqe, *dest; u32 prod_index; int nfreed = 0; + int wqe_index; u8 owner_bit; for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); @@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, V2_CQE_BYTE_16_LCL_QPN_S) & HNS_ROCE_V2_CQE_QPN_MASK) == qpn) { - /* In v1 engine, not support SRQ */ + if (srq && + roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) { + wqe_index = roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + hns_roce_free_srq_wqe(srq, wqe_index); + } ++nfreed; } else if (nfreed) { dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & @@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_srq *srq = NULL; struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; @@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; + if (is_send) { + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_hr_srq((*cur_qp)->ibqp.srq); + wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S)); + wc->wr_id = srq->wrid[wqe_ctr]; + hns_roce_free_srq_wqe(srq, wqe_ctr); + } else { + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, V2_CQE_BYTE_4_STATUS_S); switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { @@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->status = IB_WC_GENERAL_ERR; break; } - - wq = &(*cur_qp)->sq; - if ((*cur_qp)->sq_signal_bits) { - /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ - wqe_ctr = (u16)roce_get_field(cqe->byte_4, - V2_CQE_BYTE_4_WQE_INDX_M, - V2_CQE_BYTE_4_WQE_INDX_S); - wq->tail += (wqe_ctr - (u16)wq->tail) & - (wq->wqe_cnt - 1); - } - - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; } else { /* RQ correspond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); @@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, return -EAGAIN; } - /* Update tail pointer, record wr_id */ - wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M, V2_CQE_BYTE_32_SL_S); wc->src_qp = (u8)roce_get_field(cqe->byte_32, @@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if ((cur_state != IB_QPS_RESET && + (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || + ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && + (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) + return true; + + return false; + +} + static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, @@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, */ memset(qpc_mask, 0xff, sizeof(*qpc_mask)); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, qpc_mask); if (ret) goto out; - } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { + } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { /* Nothing */ ; } else { @@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, + ibqp->srq ? 1 : 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_INV_CREDIT_S, 0); + /* Every status migrate must change state */ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S, new_state); @@ -4012,7 +4133,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - int is_user) + bool is_user) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_free_db(hr_dev, &hr_qp->rdb); } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) { kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } @@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; @@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, int aeqe_found = 0; int event_type; int sub_type; + u32 srqn; u32 qpn; u32 cqn; @@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + srqn = roce_get_field(aeqe->event.srq_event.srq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: hns_roce_qp_event(hr_dev, qpn, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, srqn, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: @@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, eqe_alloc = i * (buf_chk_sz / eq->eqe_size); size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[i] = dma_alloc_coherent(dev, size, + eq->buf[i] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[i]), GFP_KERNEL); if (!eq->buf[i]) goto err_dma_alloc_buf; - memset(eq->buf[i], 0, size); *(eq->bt_l0 + i) = eq->buf_dma[i]; eq_buf_cnt++; @@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[idx] = dma_alloc_coherent(dev, size, + eq->buf[idx] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[idx]), GFP_KERNEL); if (!eq->buf[idx]) goto err_dma_alloc_buf; - memset(eq->buf[idx], 0, size); *(eq->bt_l1[i] + j) = eq->buf_dma[idx]; eq_buf_cnt++; @@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto free_cmd_mbox; } - eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz, + eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz, &(eq->buf_list->map), GFP_KERNEL); if (!eq->buf_list->buf) { @@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto err_alloc_buf; } - memset(eq->buf_list->buf, 0, buf_chk_sz); } else { ret = hns_roce_mhop_alloc_eq(hr_dev, eq); if (ret) { @@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.srqwqe_hop_num)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->max)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + cpu_to_le32(dma_handle_wqe >> 35)); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3); + srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + cpu_to_le32(dma_handle_idx >> 35)); + + srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT); + srq_context->idx_cur_blk_addr = + cpu_to_le32(srq_context->idx_cur_blk_addr); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.idx_hop_num); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + hr_dev->caps.idx_ba_pg_sz); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + hr_dev->caps.idx_buf_pg_sz); + + srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); + srq_context->idx_nxt_blk_addr = + cpu_to_le32(srq_context->idx_nxt_blk_addr); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->max) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(hr_dev->dev, + "MODIFY SRQ Failed to cmd mailbox.\n"); + return ret; + } + } + + return 0; +} + +int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n"); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que) +{ + int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ + for (i = 0; idx_que->bitmap[i] == 0; ++i) + ; + bit_num = ffs(idx_que->bitmap[i]); + idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + + return i * sizeof(u64) * 8 + (bit_num - 1); +} + +static void fill_idx_queue(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + cur_idx * idx_que->entry_sz); + *addr = wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge > srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que); + fill_idx_queue(&srq->idx_que, ind, wqe_idx); + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg->len = 0; + dseg->lkey = cpu_to_le32(0x100); + dseg->addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->max - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; + srq_db.parameter = srq->head; + + hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + +static const struct ib_device_ops hns_roce_v2_dev_ops = { + .destroy_qp = hns_roce_v2_destroy_qp, + .modify_cq = hns_roce_v2_modify_cq, + .poll_cq = hns_roce_v2_poll_cq, + .post_recv = hns_roce_v2_post_recv, + .post_send = hns_roce_v2_post_send, + .query_qp = hns_roce_v2_query_qp, + .req_notify_cq = hns_roce_v2_req_notify_cq, +}; + +static const struct ib_device_ops hns_roce_v2_dev_srq_ops = { + .modify_srq = hns_roce_v2_modify_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .query_srq = hns_roce_v2_query_srq, +}; + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .poll_cq = hns_roce_v2_poll_cq, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, + .write_srqc = hns_roce_v2_write_srqc, + .modify_srq = hns_roce_v2_modify_srq, + .query_srq = hns_roce_v2_query_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .hns_roce_dev_ops = &hns_roce_v2_dev_ops, + .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8bc820635bbd..b72d0443c835 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -46,10 +46,16 @@ #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ 0x100000 +#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 +#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 @@ -61,6 +67,8 @@ #define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 #define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 @@ -71,6 +79,7 @@ #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 +#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 @@ -84,8 +93,10 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_SRQWQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_EQE_HOP_NUM 2 +#define HNS_ROCE_IDX_HOP_NUM 1 #define HNS_ROCE_V2_GID_INDEX_NUM 256 @@ -113,6 +124,8 @@ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0 +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT) #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 @@ -213,7 +226,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, + HNS_ROCE_OPC_POST_MB = 0x8504, + HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_SWITCH_PARAMETER_CFG = 0x1033, }; enum { @@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +struct hns_roce_srq_context { + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; +}; + +#define SRQC_BYTE_4_SRQ_ST_S 0 +#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) + +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2 +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2) + +#define SRQC_BYTE_4_SRQ_SHIFT_S 4 +#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4) + +#define SRQC_BYTE_4_SRQN_S 8 +#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8) + +#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0 +#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0) + +#define SRQC_BYTE_12_SRQ_XRCD_S 0 +#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0) + +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0 +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_28_PD_S 0 +#define SRQC_BYTE_28_PD_M GENMASK(23, 0) + +#define SRQC_BYTE_28_RQWS_S 24 +#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24) + +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0 +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0 +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22 +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22) + +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24 +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28 +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0 +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0 +#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0) + +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24 +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28 +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0 + +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) + enum{ V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, @@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +struct hns_roce_vf_switch { + __le32 rocee_sel; + __le32 fun_id; + __le32 cfg; + __le32 resv1; + __le32 resv2; + __le32 resv3; +}; + +#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 +#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) + +#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 +#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 +#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 + +struct hns_roce_post_mbox { + __le32 in_param_l; + __le32 in_param_h; + __le32 out_param_l; + __le32 out_param_h; + __le32 cmd_tag; + __le32 token_event_en; +}; + +struct hns_roce_mbox_status { + __le32 mb_status_hw_run; + __le32 rsv[5]; +}; + struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc { #define HNS_ROCE_HW_RUN_BIT_SHIFT 31 #define HNS_ROCE_HW_MB_STATUS_MASK 0xFF -#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00 -#define HNS_ROCE_VF_MB4_TAG_SHIFT 8 - -#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF -#define HNS_ROCE_VF_MB4_CMD_SHIFT 0 - -#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000 -#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16 - -#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF -#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0 - struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1b3ee514f2ef..c79054ba9495 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + props->max_srq = hr_dev->caps.max_srqs; + props->max_srq_wr = hr_dev->caps.max_srq_wrs; + props->max_srq_sge = hr_dev->caps.max_srq_sges; + } return 0; } @@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ib_unregister_device(&hr_dev->ib_dev); } +static const struct ib_device_ops hns_roce_dev_ops = { + .add_gid = hns_roce_add_gid, + .alloc_pd = hns_roce_alloc_pd, + .alloc_ucontext = hns_roce_alloc_ucontext, + .create_ah = hns_roce_create_ah, + .create_cq = hns_roce_ib_create_cq, + .create_qp = hns_roce_create_qp, + .dealloc_pd = hns_roce_dealloc_pd, + .dealloc_ucontext = hns_roce_dealloc_ucontext, + .del_gid = hns_roce_del_gid, + .dereg_mr = hns_roce_dereg_mr, + .destroy_ah = hns_roce_destroy_ah, + .destroy_cq = hns_roce_ib_destroy_cq, + .disassociate_ucontext = hns_roce_disassociate_ucontext, + .get_dma_mr = hns_roce_get_dma_mr, + .get_link_layer = hns_roce_get_link_layer, + .get_netdev = hns_roce_get_netdev, + .get_port_immutable = hns_roce_port_immutable, + .mmap = hns_roce_mmap, + .modify_device = hns_roce_modify_device, + .modify_port = hns_roce_modify_port, + .modify_qp = hns_roce_modify_qp, + .query_ah = hns_roce_query_ah, + .query_device = hns_roce_query_device, + .query_pkey = hns_roce_query_pkey, + .query_port = hns_roce_query_port, + .reg_user_mr = hns_roce_reg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mr_ops = { + .rereg_user_mr = hns_roce_rereg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mw_ops = { + .alloc_mw = hns_roce_alloc_mw, + .dealloc_mw = hns_roce_dealloc_mw, +}; + +static const struct ib_device_ops hns_roce_dev_frmr_ops = { + .alloc_mr = hns_roce_alloc_mr, + .map_mr_sg = hns_roce_map_mr_sg, +}; + +static const struct ib_device_ops hns_roce_dev_srq_ops = { + .create_srq = hns_roce_create_srq, + .destroy_srq = hns_roce_destroy_srq, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - /* HCA||device||port */ - ib_dev->modify_device = hns_roce_modify_device; - ib_dev->query_device = hns_roce_query_device; - ib_dev->query_port = hns_roce_query_port; - ib_dev->modify_port = hns_roce_modify_port; - ib_dev->get_link_layer = hns_roce_get_link_layer; - ib_dev->get_netdev = hns_roce_get_netdev; - ib_dev->add_gid = hns_roce_add_gid; - ib_dev->del_gid = hns_roce_del_gid; - ib_dev->query_pkey = hns_roce_query_pkey; - ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; - ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; - ib_dev->mmap = hns_roce_mmap; - - /* PD */ - ib_dev->alloc_pd = hns_roce_alloc_pd; - ib_dev->dealloc_pd = hns_roce_dealloc_pd; - - /* AH */ - ib_dev->create_ah = hns_roce_create_ah; - ib_dev->query_ah = hns_roce_query_ah; - ib_dev->destroy_ah = hns_roce_destroy_ah; - - /* QP */ - ib_dev->create_qp = hns_roce_create_qp; - ib_dev->modify_qp = hns_roce_modify_qp; - ib_dev->query_qp = hr_dev->hw->query_qp; - ib_dev->destroy_qp = hr_dev->hw->destroy_qp; - ib_dev->post_send = hr_dev->hw->post_send; - ib_dev->post_recv = hr_dev->hw->post_recv; - - /* CQ */ - ib_dev->create_cq = hns_roce_ib_create_cq; - ib_dev->modify_cq = hr_dev->hw->modify_cq; - ib_dev->destroy_cq = hns_roce_ib_destroy_cq; - ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; - ib_dev->poll_cq = hr_dev->hw->poll_cq; - - /* MR */ - ib_dev->get_dma_mr = hns_roce_get_dma_mr; - ib_dev->reg_user_mr = hns_roce_reg_user_mr; - ib_dev->dereg_mr = hns_roce_dereg_mr; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->rereg_user_mr = hns_roce_rereg_user_mr; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); } /* MW */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->alloc_mw = hns_roce_alloc_mw; - ib_dev->dealloc_mw = hns_roce_dealloc_mw; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); } /* FRMR */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { - ib_dev->alloc_mr = hns_roce_alloc_mr; - ib_dev->map_mr_sg = hns_roce_map_mr_sg; - } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) + ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops); - /* OTHERS */ - ib_dev->get_port_immutable = hns_roce_port_immutable; - ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; + /* SRQ */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ib_dev->uverbs_cmd_mask |= + (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); + } ib_dev->driver_id = RDMA_DRIVER_HNS; + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ret = ib_register_device(ib_dev, "hns_%d", NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } + if (hr_dev->caps.srqc_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, + HEM_TYPE_SRQC, + hr_dev->caps.srqc_entry_sz, + hr_dev->caps.num_srqs, 1); + if (ret) { + dev_err(dev, + "Failed to init SRQ context memory, aborting.\n"); + goto err_unmap_cq; + } + } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + HEM_TYPE_SRQWQE, + hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_srqwqe_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT srqwqe memory, aborting.\n"); + goto err_unmap_srq; + } + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table, + HEM_TYPE_IDX, + hr_dev->caps.idx_entry_sz, + hr_dev->caps.num_idx_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT idx memory, aborting.\n"); + goto err_unmap_srqwqe; + } + } + return 0; +err_unmap_srqwqe: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + +err_unmap_srq: + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); + +err_unmap_cq: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + err_unmap_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, @@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_cq_table_free; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ret = hns_roce_init_srq_table(hr_dev); + if (ret) { + dev_err(dev, + "Failed to init share receive queue table.\n"); + goto err_qp_table_free; + } + } + return 0; +err_qp_table_free: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_qp_table(hr_dev); + err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 521ad2aa3a4e..ee5991bd4171 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, struct hns_roce_buddy *buddy; int ret; - if (mtt_type == MTT_TYPE_WQE) { + switch (mtt_type) { + case MTT_TYPE_WQE: buddy = &mr_table->mtt_buddy; table = &mr_table->mtt_table; - } else { + break; + case MTT_TYPE_CQE: buddy = &mr_table->mtt_cqe_buddy; table = &mr_table->mtt_cqe_table; + break; + case MTT_TYPE_SRQWQE: + buddy = &mr_table->mtt_srqwqe_buddy; + table = &mr_table->mtt_srqwqe_table; + break; + case MTT_TYPE_IDX: + buddy = &mr_table->mtt_idx_buddy; + table = &mr_table->mtt_idx_table; + break; + default: + dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", + mtt_type); + return -EINVAL; } ret = hns_roce_buddy_alloc(buddy, order, seg); @@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - if (mtt->mtt_type == MTT_TYPE_WQE) { + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); - } else { + break; + case MTT_TYPE_CQE: hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_SRQWQE: + hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_IDX: + hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, clean mtt failed\n", + mtt->mtt_type); + break; } } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); @@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 i; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + table = &hr_dev->mr_table.mtt_table; bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: + table = &hr_dev->mr_table.mtt_cqe_table; bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + table = &hr_dev->mr_table.mtt_srqwqe_table; + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + table = &hr_dev->mr_table.mtt_idx_table; + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + return -EINVAL; + } /* All MTTs must fit in the same page */ if (start_index / (bt_page_size / sizeof(u64)) != @@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) - table = &hr_dev->mr_table.mtt_table; - else - table = &hr_dev->mr_table.mtt_cqe_table; - mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); @@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, if (mtt->order < 0) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } while (npages > 0) { chunk = min_t(int, bt_page_size / sizeof(u64), npages); @@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy_cqe; } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, + ilog2(hr_dev->caps.num_srqwqe_segs)); + if (ret) + goto err_buddy_srqwqe; + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, + ilog2(hr_dev->caps.num_idx_segs)); + if (ret) + goto err_buddy_idx; + } + return 0; +err_buddy_idx: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); + +err_buddy_srqwqe: + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); + err_buddy_cqe: hns_roce_buddy_cleanup(&mr_table->mtt_buddy); @@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + if (hr_dev->caps.num_idx_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); hns_roce_buddy_cleanup(&mr_table->mtt_buddy); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); @@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 n; - order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : - hr_dev->caps.cqe_ba_pg_sz; + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + order = hr_dev->caps.mtt_ba_pg_sz; + break; + case MTT_TYPE_CQE: + order = hr_dev->caps.cqe_ba_pg_sz; + break; + case MTT_TYPE_SRQWQE: + order = hr_dev->caps.srqwqe_ba_pg_sz; + break; + case MTT_TYPE_IDX: + order = hr_dev->caps.idx_ba_pg_sz; + break; + default: + dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } + bt_page_size = 1 << (order + PAGE_SHIFT); pages = (u64 *) __get_free_pages(GFP_KERNEL, order); @@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } } else { - int pbl_size = 1; + u64 pbl_size = 1; bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { dev_err(dev, - " MR len %lld err. MR page num is limited to %d!\n", + " MR len %lld err. MR page num is limited to %lld!\n", length, pbl_size); ret = -EINVAL; goto err_umem; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5ebf481a39d9..54031c5b53fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_srq, + struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; @@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - /* If srq exit, set zero for relative number of rq */ - if (has_srq) { - if (cap->max_recv_wr) { - dev_dbg(dev, "srq no need config max_recv_wr\n"); - return -EINVAL; - } - - hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + /* If srq exist, set zero for relative number of rq */ + if (!has_rq) { + hr_qp->rq.wqe_cnt = 0; + hr_qp->rq.max_gs = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); @@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, - !!init_attr->srq, hr_qp); + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, + hns_roce_qp_has_rq(init_attr), hr_qp); if (ret) { dev_err(dev, "hns_roce_set_rq_size failed\n"); goto err_out; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr)) { /* allocate recv inline buf */ hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt, sizeof(struct hns_roce_rinl_wqe), @@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, init_attr->cap.max_recv_sge]; } - if (ib_pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); ret = -EFAULT; @@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); - if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && + if (udata && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { /* indicate kernel supports rq record db */ @@ -811,7 +810,7 @@ err_qpn: hns_roce_release_range_qp(hr_dev, qpn, 1); err_wrid: - if (ib_pd->uobject) { + if (udata) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) @@ -824,7 +823,7 @@ err_wrid: } err_sq_dbmap: - if (ib_pd->uobject) + if (udata) if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && (udata->inlen >= sizeof(ucmd)) && (udata->outlen >= sizeof(resp)) && @@ -837,13 +836,13 @@ err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); err_buf: - if (ib_pd->uobject) + if (hr_qp->umem) ib_umem_release(hr_qp->umem); else hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); err_db: - if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && + if (!udata && hns_roce_qp_has_rq(init_attr) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb); @@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, } case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) { + if (udata) { dev_err(dev, "not support usr space GSI\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c new file mode 100644 index 000000000000..960b1946c365 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018 Hisilicon Limited. + */ + +#include <rdma/ib_umem.h> +#include <rdma/hns-abi.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_srq *srq; + + xa_lock(&srq_table->xa); + srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + xa_unlock(&srq_table->xa); + + if (!srq) { + dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} +EXPORT_SYMBOL_GPL(hns_roce_srq_event); + +static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event event; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + dev_err(hr_dev->dev, + "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + event_type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, + HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd, + struct hns_roce_mtt *hr_mtt, u64 db_rec_addr, + struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_cmd_mailbox *mailbox; + dma_addr_t dma_handle_wqe; + dma_addr_t dma_handle_idx; + u64 *mtts_wqe; + u64 *mtts_idx; + int ret; + + /* Get the physical address of srq buf */ + mtts_wqe = hns_roce_table_find(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + srq->mtt.first_seg, + &dma_handle_wqe); + if (!mtts_wqe) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find srq buf addr.\n"); + return -EINVAL; + } + + /* Get physical address of idx que buf */ + mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, + srq->idx_que.mtt.first_seg, + &dma_handle_idx); + if (!mtts_idx) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find idx que buf addr.\n"); + return -EINVAL; + } + + ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); + if (ret == -1) { + dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) + goto err_out; + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) + goto err_put; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_xa; + } + + hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, + mtts_wqe, mtts_idx, dma_handle_wqe, + dma_handle_idx); + + ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) + goto err_xa; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return ret; + +err_xa: + xa_erase(&srq_table->xa, srq->srqn); + +err_put: + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + +err_out: + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; +} + +void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + int ret; + + ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + if (ret) + dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + ret, srq->srqn); + + xa_erase(&srq_table->xa, srq->srqn); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); +} + +static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, + u32 page_shift) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 bitmap_num; + int i; + + bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64)); + + idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL); + if (!idx_que->bitmap) + return -ENOMEM; + + bitmap_num = bitmap_num / (8 * sizeof(u64)); + + idx_que->buf_size = srq->idx_que.buf_size; + + if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, + &idx_que->idx_buf, page_shift)) { + kfree(idx_que->bitmap); + return -ENOMEM; + } + + for (i = 0; i < bitmap_num; i++) + idx_que->bitmap[i] = ~(0UL); + + return 0; +} + +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_srq *srq; + int srq_desc_size; + int srq_buf_size; + u32 page_shift; + int ret = 0; + u32 npages; + u32 cqn; + + /* Check the actual SRQ wqe and SRQ sge num */ + if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + srq->max_gs = srq_init_attr->attr.max_sge; + + srq_desc_size = max(16, 16 * srq->max_gs); + + srq->wqe_shift = ilog2(srq_desc_size); + + srq_buf_size = srq->max * srq_desc_size; + + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->mtt.mtt_type = MTT_TYPE_SRQWQE; + srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; + + if (udata) { + struct hns_roce_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + srq_buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + if (hr_dev->caps.srqwqe_buf_pg_sz) { + npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, + &srq->mtt); + } else + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->umem), + srq->umem->page_shift, + &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); + if (ret) + goto err_srq_mtt; + + /* config index queue BA */ + srq->idx_que.umem = ib_umem_get(pd->uobject->context, + ucmd.que_addr, + srq->idx_que.buf_size, 0, 0); + if (IS_ERR(srq->idx_que.umem)) { + dev_err(hr_dev->dev, + "ib_umem_get error for index queue\n"); + ret = PTR_ERR(srq->idx_que.umem); + goto err_srq_mtt; + } + + if (hr_dev->caps.idx_buf_pg_sz) { + npages = (ib_umem_page_count(srq->idx_que.umem) + + (1 << hr_dev->caps.idx_buf_pg_sz) - 1) / + (1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, &srq->idx_que.mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->idx_que.umem), + srq->idx_que.umem->page_shift, + &srq->idx_que.mtt); + } + + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_mtt_init error for idx que\n"); + goto err_idx_mtt; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, + srq->idx_que.umem); + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_ib_umem_write_mtt error for idx que\n"); + goto err_idx_buf; + } + } else { + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + if (hns_roce_buf_alloc(hr_dev, srq_buf_size, + (1 << page_shift) * 2, + &srq->buf, page_shift)) { + ret = -ENOMEM; + goto err_srq; + } + + srq->head = 0; + srq->tail = srq->max - 1; + + ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, + srq->buf.page_shift, &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + if (ret) + goto err_srq_mtt; + + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_create_idx_que(pd, srq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", + ret); + goto err_srq_mtt; + } + + /* Init mtt table for idx_que */ + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, + srq->idx_que.idx_buf.page_shift, + &srq->idx_que.mtt); + if (ret) + goto err_create_idx; + + /* Write buffer address into the mtt table */ + ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, + &srq->idx_que.idx_buf); + if (ret) + goto err_idx_buf; + + srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + ret = -ENOMEM; + goto err_idx_buf; + } + } + + cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? + to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + &srq->mtt, 0, srq); + if (ret) + goto err_wrid; + + srq->event = hns_roce_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->srqn; + + if (udata) { + if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + ret = -EFAULT; + goto err_wrid; + } + } + + return &srq->ibsrq; + +err_wrid: + kvfree(srq->wrid); + +err_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_idx_mtt: + if (udata) + ib_umem_release(srq->idx_que.umem); + +err_create_idx: + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, + &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + +err_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_buf: + if (udata) + ib_umem_release(srq->umem); + else + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + +err_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int hns_roce_destroy_srq(struct ib_srq *ibsrq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + + hns_roce_srq_free(hr_dev, srq); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + + if (ibsrq->uobject) { + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->umem); + } else { + kvfree(srq->wrid); + hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + &srq->buf); + } + + kfree(srq); + + return 0; +} + +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + + xa_init(&srq_table->xa); + + return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs, + hr_dev->caps.num_srqs - 1, + hr_dev->caps.reserved_srqs, 0); +} + +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 4b3999d88c9e..206cfb0016f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 102875872bea..0b675b0742c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - if (ibpd->uobject && ibpd->uobject->context) { - iwqp->user_mode = 1; - ucontext = to_ucontext(ibpd->uobject->context); - - if (req.user_wqe_buffers) { - struct i40iw_pbl *iwpbl; - - spin_lock_irqsave( - &ucontext->qp_reg_mem_list_lock, flags); - iwpbl = i40iw_get_pbl( - (unsigned long)req.user_wqe_buffers, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore( - &ucontext->qp_reg_mem_list_lock, flags); - - if (!iwpbl) { - err_code = -ENODATA; - i40iw_pr_err("no pbl info\n"); - goto error; - } - memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); + iwqp->user_mode = 1; + ucontext = to_ucontext(ibpd->uobject->context); + + if (req.user_wqe_buffers) { + struct i40iw_pbl *iwpbl; + + spin_lock_irqsave( + &ucontext->qp_reg_mem_list_lock, flags); + iwpbl = i40iw_get_pbl( + (unsigned long)req.user_wqe_buffers, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore( + &ucontext->qp_reg_mem_list_lock, flags); + + if (!iwpbl) { + err_code = -ENODATA; + i40iw_pr_err("no pbl info\n"); + goto error; } + memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); } err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info); } else { @@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); i40iw_add_devusecount(iwdev); - if (ibpd->uobject && udata) { + if (udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; @@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) ib_umem_release(iwmr->region); if (iwmr->type != IW_MEMREG_TYPE_MEM) { - if (ibpd->uobject) { + /* region is released. only test for userness. */ + if (iwmr->region) { struct i40iw_ucontext *ucontext; ucontext = to_ucontext(ibpd->uobject->context); @@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev, return 0; } -/** - * i40iw_get_vector_affinity - report IRQ affinity mask - * @ibdev: IB device - * @comp_vector: completion vector index - */ -static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) -{ - struct i40iw_device *iwdev = to_iwdev(ibdev); - struct i40iw_msix_vector *msix_vec; - - if (iwdev->msix_shared) - msix_vec = &iwdev->iw_msixtbl[comp_vector]; - else - msix_vec = &iwdev->iw_msixtbl[comp_vector + 1]; - - return irq_get_affinity_mask(msix_vec->irq); -} +static const struct ib_device_ops i40iw_dev_ops = { + .alloc_hw_stats = i40iw_alloc_hw_stats, + .alloc_mr = i40iw_alloc_mr, + .alloc_pd = i40iw_alloc_pd, + .alloc_ucontext = i40iw_alloc_ucontext, + .create_cq = i40iw_create_cq, + .create_qp = i40iw_create_qp, + .dealloc_pd = i40iw_dealloc_pd, + .dealloc_ucontext = i40iw_dealloc_ucontext, + .dereg_mr = i40iw_dereg_mr, + .destroy_cq = i40iw_destroy_cq, + .destroy_qp = i40iw_destroy_qp, + .drain_rq = i40iw_drain_rq, + .drain_sq = i40iw_drain_sq, + .get_dev_fw_str = i40iw_get_dev_fw_str, + .get_dma_mr = i40iw_get_dma_mr, + .get_hw_stats = i40iw_get_hw_stats, + .get_port_immutable = i40iw_port_immutable, + .map_mr_sg = i40iw_map_mr_sg, + .mmap = i40iw_mmap, + .modify_qp = i40iw_modify_qp, + .poll_cq = i40iw_poll_cq, + .post_recv = i40iw_post_recv, + .post_send = i40iw_post_send, + .query_device = i40iw_query_device, + .query_gid = i40iw_query_gid, + .query_pkey = i40iw_query_pkey, + .query_port = i40iw_query_port, + .query_qp = i40iw_query_qp, + .reg_user_mr = i40iw_reg_user_mr, + .req_notify_cq = i40iw_req_notify_cq, +}; /** * i40iw_init_rdma_device - initialization of iwarp device @@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.query_pkey = i40iw_query_pkey; - iwibdev->ibdev.query_gid = i40iw_query_gid; - iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; - iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext; - iwibdev->ibdev.mmap = i40iw_mmap; - iwibdev->ibdev.alloc_pd = i40iw_alloc_pd; - iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd; - iwibdev->ibdev.create_qp = i40iw_create_qp; - iwibdev->ibdev.modify_qp = i40iw_modify_qp; - iwibdev->ibdev.query_qp = i40iw_query_qp; - iwibdev->ibdev.destroy_qp = i40iw_destroy_qp; - iwibdev->ibdev.create_cq = i40iw_create_cq; - iwibdev->ibdev.destroy_cq = i40iw_destroy_cq; - iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; - iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; - iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; - iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; - iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.drain_sq = i40iw_drain_sq; - iwibdev->ibdev.drain_rq = i40iw_drain_rq; - iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; - iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); @@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); - iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; - iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; - iwibdev->ibdev.poll_cq = i40iw_poll_cq; - iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; - iwibdev->ibdev.post_send = i40iw_post_send; - iwibdev->ibdev.post_recv = i40iw_post_recv; - iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity; + ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index e9e3a6f390db..1672808262ba 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, } struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx4_ib_ah *ah; @@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); if (IS_ERR(ah)) return ah; @@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah) +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 155b4dfc0ae8..782499abcd98 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); for (i = 1; i <= dev->num_ports; ++i) { - if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) { ret = -EFAULT; goto err_unregister; } diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8942f5f7f04d..25439da8976c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, return -EINVAL; rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0); } - ah = rdma_create_ah(tun_ctx->pd, &attr); + ah = rdma_create_ah(tun_ctx->pd, &attr, 0); if (IS_ERR(ah)) return -ENOMEM; @@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) - rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, @@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&tun_qp->tx_lock); tun_qp->tx_ring[tun_tx_ix].ah = NULL; end: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); return ret; } @@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) - rdma_destroy_ah(mad_send_wc->send_buf->context[0]); + rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0); ib_free_send_mad(mad_send_wc->send_buf); } @@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], 0); } } @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah); + mlx4_ib_destroy_ah(ah, 0); return ret; } @@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) - rdma_destroy_ah(tun_qp->tx_ring[i].ah); + rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); @@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "wrid=0x%llx, status=0x%x\n", wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0def2323459c..1f15ec3e2b83 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, } } +static const struct ib_device_ops mlx4_ib_hw_stats_ops = { + .alloc_hw_stats = mlx4_ib_alloc_hw_stats, + .get_hw_stats = mlx4_ib_get_hw_stats, +}; + static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) { struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; @@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) diag[i].offset, i); } - ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; - ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops); return 0; @@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, event == NETDEV_UP || event == NETDEV_CHANGE)) update_qps_port = port; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_UP || event == NETDEV_DOWN)) { + enum ib_port_state port_state; + struct ib_event ibev = { }; + + if (ib_get_cached_port_state(&ibdev->ib_dev, port, + &port_state)) + continue; + + if (event == NETDEV_UP && + (port_state != IB_PORT_ACTIVE || + iboe->last_port_state[port - 1] != IB_PORT_DOWN)) + continue; + if (event == NETDEV_DOWN && + (port_state != IB_PORT_DOWN || + iboe->last_port_state[port - 1] != IB_PORT_ACTIVE)) + continue; + iboe->last_port_state[port - 1] = port_state; + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : + IB_EVENT_PORT_ERR; + ib_dispatch_event(&ibev); + } + } spin_unlock_bh(&iboe->lock); @@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str) (int) dev->dev->caps.fw_ver & 0xffff); } +static const struct ib_device_ops mlx4_ib_dev_ops = { + .add_gid = mlx4_ib_add_gid, + .alloc_mr = mlx4_ib_alloc_mr, + .alloc_pd = mlx4_ib_alloc_pd, + .alloc_ucontext = mlx4_ib_alloc_ucontext, + .attach_mcast = mlx4_ib_mcg_attach, + .create_ah = mlx4_ib_create_ah, + .create_cq = mlx4_ib_create_cq, + .create_qp = mlx4_ib_create_qp, + .create_srq = mlx4_ib_create_srq, + .dealloc_pd = mlx4_ib_dealloc_pd, + .dealloc_ucontext = mlx4_ib_dealloc_ucontext, + .del_gid = mlx4_ib_del_gid, + .dereg_mr = mlx4_ib_dereg_mr, + .destroy_ah = mlx4_ib_destroy_ah, + .destroy_cq = mlx4_ib_destroy_cq, + .destroy_qp = mlx4_ib_destroy_qp, + .destroy_srq = mlx4_ib_destroy_srq, + .detach_mcast = mlx4_ib_mcg_detach, + .disassociate_ucontext = mlx4_ib_disassociate_ucontext, + .drain_rq = mlx4_ib_drain_rq, + .drain_sq = mlx4_ib_drain_sq, + .get_dev_fw_str = get_fw_ver_str, + .get_dma_mr = mlx4_ib_get_dma_mr, + .get_link_layer = mlx4_ib_port_link_layer, + .get_netdev = mlx4_ib_get_netdev, + .get_port_immutable = mlx4_port_immutable, + .map_mr_sg = mlx4_ib_map_mr_sg, + .mmap = mlx4_ib_mmap, + .modify_cq = mlx4_ib_modify_cq, + .modify_device = mlx4_ib_modify_device, + .modify_port = mlx4_ib_modify_port, + .modify_qp = mlx4_ib_modify_qp, + .modify_srq = mlx4_ib_modify_srq, + .poll_cq = mlx4_ib_poll_cq, + .post_recv = mlx4_ib_post_recv, + .post_send = mlx4_ib_post_send, + .post_srq_recv = mlx4_ib_post_srq_recv, + .process_mad = mlx4_ib_process_mad, + .query_ah = mlx4_ib_query_ah, + .query_device = mlx4_ib_query_device, + .query_gid = mlx4_ib_query_gid, + .query_pkey = mlx4_ib_query_pkey, + .query_port = mlx4_ib_query_port, + .query_qp = mlx4_ib_query_qp, + .query_srq = mlx4_ib_query_srq, + .reg_user_mr = mlx4_ib_reg_user_mr, + .req_notify_cq = mlx4_ib_arm_cq, + .rereg_user_mr = mlx4_ib_rereg_user_mr, + .resize_cq = mlx4_ib_resize_cq, +}; + +static const struct ib_device_ops mlx4_ib_dev_wq_ops = { + .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table, + .create_wq = mlx4_ib_create_wq, + .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, + .destroy_wq = mlx4_ib_destroy_wq, + .modify_wq = mlx4_ib_modify_wq, +}; + +static const struct ib_device_ops mlx4_ib_dev_fmr_ops = { + .alloc_fmr = mlx4_ib_fmr_alloc, + .dealloc_fmr = mlx4_ib_fmr_dealloc, + .map_phys_fmr = mlx4_ib_map_phys_fmr, + .unmap_fmr = mlx4_ib_unmap_fmr, +}; + +static const struct ib_device_ops mlx4_ib_dev_mw_ops = { + .alloc_mw = mlx4_ib_alloc_mw, + .dealloc_mw = mlx4_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { + .alloc_xrcd = mlx4_ib_alloc_xrcd, + .dealloc_xrcd = mlx4_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx4_ib_dev_fs_ops = { + .create_flow = mlx4_ib_create_flow, + .destroy_flow = mlx4_ib_destroy_flow, +}; + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; - ibdev->ib_dev.add_gid = mlx4_ib_add_gid; - ibdev->ib_dev.del_gid = mlx4_ib_del_gid; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ibdev->ib_dev.query_device = mlx4_ib_query_device; - ibdev->ib_dev.query_port = mlx4_ib_query_port; - ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; - ibdev->ib_dev.query_gid = mlx4_ib_query_gid; - ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; - ibdev->ib_dev.modify_device = mlx4_ib_modify_device; - ibdev->ib_dev.modify_port = mlx4_ib_modify_port; - ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; - ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; - ibdev->ib_dev.mmap = mlx4_ib_mmap; - ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; - ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; - ibdev->ib_dev.create_ah = mlx4_ib_create_ah; - ibdev->ib_dev.query_ah = mlx4_ib_query_ah; - ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; - ibdev->ib_dev.create_srq = mlx4_ib_create_srq; - ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; - ibdev->ib_dev.query_srq = mlx4_ib_query_srq; - ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; - ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; - ibdev->ib_dev.create_qp = mlx4_ib_create_qp; - ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; - ibdev->ib_dev.query_qp = mlx4_ib_query_qp; - ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; - ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; - ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; - ibdev->ib_dev.post_send = mlx4_ib_post_send; - ibdev->ib_dev.post_recv = mlx4_ib_post_recv; - ibdev->ib_dev.create_cq = mlx4_ib_create_cq; - ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; - ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; - ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; - ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; - ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; - ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; - ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; - ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; - ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; - ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; - ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; - ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; - ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; - ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; - ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; - ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; - + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.create_wq = mlx4_ib_create_wq; - ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; - ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; - ibdev->ib_dev.create_rwq_ind_table = - mlx4_ib_create_rwq_ind_table; - ibdev->ib_dev.destroy_rwq_ind_table = - mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); } - if (!mlx4_is_slave(ibdev->dev)) { - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - } + if (!mlx4_is_slave(ibdev->dev)) + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; - ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; - ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; - ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.create_flow = mlx4_ib_create_flow; - ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; - ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); - mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + iboe->last_port_state[i] = IB_PORT_DOWN; } num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 8850dfc3826d..e491f3eda6e7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -519,6 +519,7 @@ struct mlx4_ib_iboe { atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; + enum ib_port_state last_port_state[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah); +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0711ca1dfb8f..971e9a9ebdaf 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp, + bool is_user, int has_rq, struct mlx4_ib_qp *qp, u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ @@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); @@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - if (pd->uobject) { + if (udata) { union { struct mlx4_ib_create_qp qp; struct mlx4_ib_create_wq wq; @@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX4_IB_QP_SCATTER_FCS; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; @@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, 0); if (err) goto err; @@ -1189,7 +1189,7 @@ err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: - if (pd->uobject) { + if (udata) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { @@ -1201,20 +1201,20 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: - if (pd->uobject) + if (qp->umem) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && qp_has_rq(init_attr)) + if (!udata && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: - if (sqp) - kfree(sqp); - else if (!*caller_qp) + if (!sqp && !*caller_qp) kfree(qp); + kfree(sqp); + return err; } @@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, int is_user) + enum mlx4_ib_source_type src, bool is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - struct mlx4_ib_pd *pd; - - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); } if (is_sqp(dev, mqp)) @@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct mlx4_ib_create_wq ucmd; int err, required_cmd_sz; - if (!(udata && pd->uobject)) + if (!udata) return ERR_PTR(-EINVAL); required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 3731b31c3653..4456f1b8921d 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, buf_size = srq->msrq.max * desc_size; - if (pd->uobject) { + if (udata) { struct mlx4_ib_create_srq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx4_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; @@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_wrid: - if (pd->uobject) + if (udata) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else kvfree(srq->wrid); @@ -211,13 +211,13 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (pd->uobject) + if (srq->umem) ib_umem_release(srq->umem); else mlx4_buf_free(dev->dev, buf_size, &srq->buf); err_db: - if (!pd->uobject) + if (!udata) mlx4_db_free(dev->dev, &srq->db); err_srq: diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 752bdd536130..ea1f3a081b05 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index ffd03bf1a71e..420ae0897333 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, } struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx5_ib_ah *ah; @@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah) +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ca060a2e2b36..356bccc715ee 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) MLX5_SET(dealloc_xrcd_in, in, uid, uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index c03c56455534..1e76dc67a369 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid); int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 26ab9041f94a..90f1b0bae5b5 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } -static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) -{ - /* TBD: waiting decision - */ - return 0; -} - -static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) -{ - struct mlx5_wqe_data_seg *dpseg; - void *addr; - - dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg); - addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); - return addr; -} - -static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - uint16_t idx) -{ - void *addr; - int byte_count; - int i; - - if (!is_atomic_response(qp, idx)) - return; - - byte_count = be32_to_cpu(cqe64->byte_cnt); - addr = mlx5_get_atomic_laddr(qp, idx); - - if (byte_count == 4) { - *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); - } else { - for (i = 0; i < byte_count; i += 8) { - *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); - addr += 8; - } - } - - return; -} - -static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - u16 tail, u16 head) -{ - u16 idx; - - do { - idx = tail & (qp->sq.wqe_cnt - 1); - handle_atomic(qp, cqe64, idx); - if (idx == head) - break; - - tail = qp->sq.w_list[idx].next; - } while (1); - tail = qp->sq.w_list[idx].next; - qp->sq.last_poll = tail; -} - static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } -static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) -{ - struct mlx5_ib_wq *wq; - unsigned int cur; - unsigned int idx; - int np; - int i; - - wq = &qp->sq; - cur = wq->head - wq->tail; - np = *npolled; - - if (cur == 0) - return; - - for (i = 0; i < cur && np < num_entries; i++) { - idx = wq->last_poll & (wq->wqe_cnt - 1); - wc->wr_id = wq->wrid[idx]; - wc->status = IB_WC_WR_FLUSH_ERR; - wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; - wq->tail++; - np++; - wc->qp = &qp->ibqp; - wc++; - wq->last_poll = wq->w_list[idx].next; - } - *npolled = np; -} - -static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) +static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, + int *npolled, int is_send) { struct mlx5_ib_wq *wq; unsigned int cur; int np; int i; - wq = &qp->rq; + wq = (is_send) ? &qp->sq : &qp->rq; cur = wq->head - wq->tail; np = *npolled; @@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, *npolled = 0; /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { - sw_send_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, true); if (*npolled >= num_entries) return; } list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { - sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, false); if (*npolled >= num_entries) return; } @@ -567,7 +476,6 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); - handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 45c421c87100..5a588f3cfb1b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -9,6 +9,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" @@ -40,29 +41,32 @@ struct devx_umem_reg_cmd { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; }; -static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +static struct mlx5_ib_ucontext * +devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) { - return to_mucontext(ib_uverbs_get_ucontext(file)); + return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u64 general_obj_types; - void *hdr; + void *uctx; int err; u16 uid; + u32 cap = 0; - hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); - - general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); - if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || - !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + /* 0 means not supported */ + if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx)) return -EINVAL; - MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + if (is_user && capable(CAP_NET_RAW) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + cap |= MLX5_UCTX_CAP_RAW_TX; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + return true; + } + + return false; +} + /* * As the obj_id in the firmware is not globally unique the object type * must be considered upon checking for a valid object id. @@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } -static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u64 obj_id; @@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(query_xrq_in, in, xrqn)); break; @@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; default: + obj_id = 0; + } + + return obj_id; +} + +static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +{ + u64 obj_id = devx_get_obj_id(in); + + if (!obj_id) return false; + + switch (uobj_get_object_id(uobj)) { + case UVERBS_OBJECT_CQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + to_mcq(uobj->object)->mcq.cqn) == + obj_id; + + case UVERBS_OBJECT_SRQ: + { + struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + u16 opcode; + + switch (srq->common.res) { + case MLX5_RES_XSRQ: + opcode = MLX5_CMD_OP_CREATE_XRC_SRQ; + break; + case MLX5_RES_XRQ: + opcode = MLX5_CMD_OP_CREATE_XRQ; + break; + default: + if (!dev->mdev->issi) + opcode = MLX5_CMD_OP_CREATE_SRQ; + else + opcode = MLX5_CMD_OP_CREATE_RMP; + } + + return get_enc_obj_id(opcode, + to_msrq(uobj->object)->msrq.srqn) == + obj_id; } - if (obj_id == obj->obj_id) - return true; + case UVERBS_OBJECT_QP: + { + struct mlx5_ib_qp *qp = to_mqp(uobj->object); + enum ib_qp_type qp_type = qp->ibqp.qp_type; + + if (qp_type == IB_QPT_RAW_PACKET || + (qp->flags & MLX5_IB_QP_UNDERLAY)) { + struct mlx5_ib_raw_packet_qp *raw_packet_qp = + &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + + return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + rq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + sq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + rq->tirn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + sq->tisn) == obj_id); + } + + if (qp_type == MLX5_IB_QPT_DCT) + return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + qp->dct.mdct.mqp.qpn) == obj_id; + + return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + qp->ibqp.qp_num) == obj_id; + } - return false; + case UVERBS_OBJECT_WQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + to_mrwq(uobj->object)->core_qp.qpn) == + obj_id; + + case UVERBS_OBJECT_RWQ_IND_TBL: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + to_mrwq_ind_table(uobj->object)->rqtn) == + obj_id; + + case MLX5_IB_OBJECT_DEVX_OBJ: + return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + + default: + return false; + } } static void devx_set_umem_valid(const void *in) @@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in) case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: return true; default: return false; @@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - return c->devx_uid; } static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + if (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END) + return true; + switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in) } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) return -EFAULT; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( * queue or arm its CQ for event generation), no further harm is expected. */ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; u32 user_idx; s32 dev_idx; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int err; int uid; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); break; + case MLX5_CMD_OP_CREATE_UMEM: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_UMEM); + break; case MLX5_CMD_OP_CREATE_MKEY: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); break; @@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -970,7 +1089,7 @@ obj_free: } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); void *cmd_out; int err; int uid; @@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; void *cmd_out; int err; int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); uid = devx_get_uid(c, cmd_in); if (uid < 0) @@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct devx_umem_reg_cmd cmd; struct devx_umem *obj; @@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); if (err) @@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); -DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); - -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +static bool devx_is_supported(struct ib_device *device) { - return &devx_objects; + struct mlx5_ib_dev *dev = to_mdev(device); + + return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); } + +const struct uapi_definition mlx5_ib_devx_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_OBJ, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_UMEM, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index f86cdcafdafc..e8a1e4498e3f 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; @@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); int len, ret, i; + u32 counter_id = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && - (dest_devx || dest_qp)) - return -EINVAL; - if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } - if (dev->rep) - return -ENOTSUPP; + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); @@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( } flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { @@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); @@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( @@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; - size_t num_actions; + int num_actions; void *in; - int len; int ret; if (!mlx5_ib_modify_header_supported(mdev)) @@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) - return -EINVAL; + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)); + if (num_actions < 0) + return num_actions; ret = uverbs_get_const(&ft_type, attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); if (ret) return ret; - - num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); if (IS_ERR(action)) return PTR_ERR(action); @@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, @@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -DECLARE_UVERBS_OBJECT_TREE(flow_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); - -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +static bool flow_is_supported(struct ib_device *device) { - int i = 0; - - root[i++] = &flow_objects; - root[i++] = &mlx5_ib_fs; - root[i++] = &mlx5_ib_flow_actions; - - return i; + return !to_mdev(device)->rep; } + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 32a9e9228b13..558638468edb 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; - } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) { + props->port_cap_flags2 = + be16_to_cpup((__be16 *)(out_mad->data + 60)); + + if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP) + props->active_width = out_mad->data[31] & 0x1f; + } + /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; @@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, case 2: props->active_speed = 32; /* EDR */ break; + case 4: + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP && + props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP) + props->active_speed = IB_SPEED_HDR; + break; } } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e85974ab06c0..94fe253d4956 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -150,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev, int ret; memset(&attr, 0, sizeof(attr)); - ret = ibdev->query_port(ibdev, port_num, &attr); + ret = ibdev->ops.query_port(ibdev, port_num, &attr); if (!ret) *state = attr.state; return ret; @@ -172,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: - case NETDEV_UNREGISTER: write_lock(&roce->netdev_lock); if (ibdev->rep) { struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; @@ -181,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this, rep_ndev = mlx5_ib_get_rep_netdev(esw, ibdev->rep->vport); if (rep_ndev == ndev) - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } else if (ndev->dev.parent == &mdev->pdev->dev) { - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } write_unlock(&roce->netdev_lock); break; + case NETDEV_UNREGISTER: + write_lock(&roce->netdev_lock); + if (roce->netdev == ndev) + roce->netdev = NULL; + write_unlock(&roce->netdev_lock); + break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { @@ -1018,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cqe_128_always)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; + if (MLX5_CAP_GEN(mdev, qp_packet_based)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; } if (field_avail(typeof(resp), sw_parsing_caps, @@ -1105,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width, if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; + else if (active_width & MLX5_IB_WIDTH_2X) + *ib_width = IB_WIDTH_2X; else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; else if (active_width & MLX5_IB_WIDTH_8X) @@ -1220,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) + props->port_cap_flags2 = rep->cap_mask2; + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) goto out; @@ -1756,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, #endif if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev); + err = mlx5_ib_devx_create(dev, true); if (err < 0) goto out_uars; context->devx_uid = err; @@ -3710,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen) + void *cmd_in, int inlen, + int dst_num) { struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; @@ -3732,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = fs_matcher->match_criteria_enable; handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, 1); + flow_act, dst, dst_num); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3795,12 +3808,14 @@ struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; struct mlx5_ib_flow_handler *handler; + int dst_num = 0; bool mcast; int err; @@ -3810,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -3824,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, } if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst->type = dest_type; - dst->tir_num = dest_id; + dst[dst_num].type = dest_type; + dst[dst_num].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst->ft_num = dest_id; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } + dst_num++; + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, - cmd_in, inlen); + cmd_in, inlen, dst_num); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -5095,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int err = 0; int i; + bool is_shared; + + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; for (i = 0; i < dev->num_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); @@ -5104,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, dev->port[i].cnts.offsets); - err = mlx5_core_alloc_q_counter(dev->mdev, - &dev->port[i].cnts.set_id); + err = mlx5_cmd_alloc_q_counter(dev->mdev, + &dev->port[i].cnts.set_id, + is_shared ? + MLX5_SHARED_RESOURCE_UID : 0); if (err) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -5382,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -static const struct cpumask * -mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_comp_irq_get_affinity_mask(dev->mdev, comp_vector); -} - /* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) @@ -5617,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); -static int populate_specs_root(struct mlx5_ib_dev *dev) -{ - const struct uverbs_object_tree_def **trees = dev->driver_trees; - size_t num_trees = 0; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - trees[num_trees++] = &mlx5_ib_flow_action; - - if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) - trees[num_trees++] = &mlx5_ib_dm; - - if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) - trees[num_trees++] = mlx5_ib_get_devx_tree(); - - num_trees += mlx5_ib_get_flow_trees(trees + num_trees); - - WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); - trees[num_trees] = NULL; - dev->ib_dev.driver_specs = trees; +static const struct uapi_definition mlx5_ib_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) + UAPI_DEF_CHAIN(mlx5_ib_devx_defs), + UAPI_DEF_CHAIN(mlx5_ib_flow_defs), +#endif - return 0; -} + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_action), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + {} +}; static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, @@ -5717,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); #endif kfree(dev->port); } @@ -5771,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->memic.dev = mdev; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } + err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_free_port; + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } #endif return 0; @@ -5817,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) kfree(dev->flow_db); } +static const struct ib_device_ops mlx5_ib_dev_ops = { + .add_gid = mlx5_ib_add_gid, + .alloc_mr = mlx5_ib_alloc_mr, + .alloc_pd = mlx5_ib_alloc_pd, + .alloc_ucontext = mlx5_ib_alloc_ucontext, + .attach_mcast = mlx5_ib_mcg_attach, + .check_mr_status = mlx5_ib_check_mr_status, + .create_ah = mlx5_ib_create_ah, + .create_counters = mlx5_ib_create_counters, + .create_cq = mlx5_ib_create_cq, + .create_flow = mlx5_ib_create_flow, + .create_qp = mlx5_ib_create_qp, + .create_srq = mlx5_ib_create_srq, + .dealloc_pd = mlx5_ib_dealloc_pd, + .dealloc_ucontext = mlx5_ib_dealloc_ucontext, + .del_gid = mlx5_ib_del_gid, + .dereg_mr = mlx5_ib_dereg_mr, + .destroy_ah = mlx5_ib_destroy_ah, + .destroy_counters = mlx5_ib_destroy_counters, + .destroy_cq = mlx5_ib_destroy_cq, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, + .destroy_qp = mlx5_ib_destroy_qp, + .destroy_srq = mlx5_ib_destroy_srq, + .detach_mcast = mlx5_ib_mcg_detach, + .disassociate_ucontext = mlx5_ib_disassociate_ucontext, + .drain_rq = mlx5_ib_drain_rq, + .drain_sq = mlx5_ib_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mlx5_ib_get_dma_mr, + .get_link_layer = mlx5_ib_port_link_layer, + .map_mr_sg = mlx5_ib_map_mr_sg, + .mmap = mlx5_ib_mmap, + .modify_cq = mlx5_ib_modify_cq, + .modify_device = mlx5_ib_modify_device, + .modify_port = mlx5_ib_modify_port, + .modify_qp = mlx5_ib_modify_qp, + .modify_srq = mlx5_ib_modify_srq, + .poll_cq = mlx5_ib_poll_cq, + .post_recv = mlx5_ib_post_recv, + .post_send = mlx5_ib_post_send, + .post_srq_recv = mlx5_ib_post_srq_recv, + .process_mad = mlx5_ib_process_mad, + .query_ah = mlx5_ib_query_ah, + .query_device = mlx5_ib_query_device, + .query_gid = mlx5_ib_query_gid, + .query_pkey = mlx5_ib_query_pkey, + .query_qp = mlx5_ib_query_qp, + .query_srq = mlx5_ib_query_srq, + .read_counters = mlx5_ib_read_counters, + .reg_user_mr = mlx5_ib_reg_user_mr, + .req_notify_cq = mlx5_ib_arm_cq, + .rereg_user_mr = mlx5_ib_rereg_user_mr, + .resize_cq = mlx5_ib_resize_cq, +}; + +static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { + .rdma_netdev_get_params = mlx5_ib_rn_get_params, +}; + +static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { + .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_stats = mlx5_ib_get_vf_stats, + .set_vf_guid = mlx5_ib_set_vf_guid, + .set_vf_link_state = mlx5_ib_set_vf_link_state, +}; + +static const struct ib_device_ops mlx5_ib_dev_mw_ops = { + .alloc_mw = mlx5_ib_alloc_mw, + .dealloc_mw = mlx5_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { + .alloc_xrcd = mlx5_ib_alloc_xrcd, + .dealloc_xrcd = mlx5_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx5_ib_dev_dm_ops = { + .alloc_dm = mlx5_ib_alloc_dm, + .dealloc_dm = mlx5_ib_dealloc_dm, + .reg_dm_mr = mlx5_ib_reg_dm_mr, +}; + int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -5855,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - dev->ib_dev.query_device = mlx5_ib_query_device; - dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; - dev->ib_dev.query_gid = mlx5_ib_query_gid; - dev->ib_dev.add_gid = mlx5_ib_add_gid; - dev->ib_dev.del_gid = mlx5_ib_del_gid; - dev->ib_dev.query_pkey = mlx5_ib_query_pkey; - dev->ib_dev.modify_device = mlx5_ib_modify_device; - dev->ib_dev.modify_port = mlx5_ib_modify_port; - dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; - dev->ib_dev.mmap = mlx5_ib_mmap; - dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; - dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; - dev->ib_dev.create_ah = mlx5_ib_create_ah; - dev->ib_dev.query_ah = mlx5_ib_query_ah; - dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; - dev->ib_dev.create_srq = mlx5_ib_create_srq; - dev->ib_dev.modify_srq = mlx5_ib_modify_srq; - dev->ib_dev.query_srq = mlx5_ib_query_srq; - dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; - dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; - dev->ib_dev.create_qp = mlx5_ib_create_qp; - dev->ib_dev.modify_qp = mlx5_ib_modify_qp; - dev->ib_dev.query_qp = mlx5_ib_query_qp; - dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; - dev->ib_dev.drain_sq = mlx5_ib_drain_sq; - dev->ib_dev.drain_rq = mlx5_ib_drain_rq; - dev->ib_dev.post_send = mlx5_ib_post_send; - dev->ib_dev.post_recv = mlx5_ib_post_recv; - dev->ib_dev.create_cq = mlx5_ib_create_cq; - dev->ib_dev.modify_cq = mlx5_ib_modify_cq; - dev->ib_dev.resize_cq = mlx5_ib_resize_cq; - dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; - dev->ib_dev.poll_cq = mlx5_ib_poll_cq; - dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; - dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; - dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; - dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; - dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; - dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; - dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; - dev->ib_dev.process_mad = mlx5_ib_process_mad; - dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; - dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; - dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) - dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params; + ib_set_device_ops(&dev->ib_dev, + &mlx5_ib_dev_ipoib_enhanced_ops); - if (mlx5_core_is_pf(mdev)) { - dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; - dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; - dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; - dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; - } - - dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + if (mlx5_core_is_pf(mdev)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops); dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; - dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); } if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; - dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) { - dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; - dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; - dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; - } + if (MLX5_CAP_DEV_MEM(mdev, memic)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; - dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; - dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; - dev->ib_dev.create_counters = mlx5_ib_create_counters; - dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; - dev->ib_dev.read_counters = mlx5_ib_read_counters; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + dev->ib_dev.driver_def = mlx5_ib_defs; err = init_node_data(dev); if (err) @@ -5966,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_ops = { + .get_port_immutable = mlx5_port_immutable, + .query_port = mlx5_ib_query_port, +}; + static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_immutable; - dev->ib_dev.query_port = mlx5_ib_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { + .get_port_immutable = mlx5_port_rep_immutable, + .query_port = mlx5_ib_rep_query_port, +}; + int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable; - dev->ib_dev.query_port = mlx5_ib_rep_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { + .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table, + .create_wq = mlx5_ib_create_wq, + .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, + .destroy_wq = mlx5_ib_destroy_wq, + .get_netdev = mlx5_ib_get_netdev, + .modify_wq = mlx5_ib_modify_wq, +}; + static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; @@ -5993,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) dev->roce[i].last_port_state = IB_PORT_DOWN; } - dev->ib_dev.get_netdev = mlx5_ib_get_netdev; - dev->ib_dev.create_wq = mlx5_ib_create_wq; - dev->ib_dev.modify_wq = mlx5_ib_modify_wq; - dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; - dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; - dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; - dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -6104,11 +6159,15 @@ void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_odp_cleanup_one(dev); } +static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, +}; + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; - dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); return mlx5_ib_alloc_counters(dev); } @@ -6166,11 +6225,6 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) -{ - return populate_specs_root(dev); -} - int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; @@ -6226,7 +6280,7 @@ static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) { int uid; - uid = mlx5_ib_devx_create(dev); + uid = mlx5_ib_devx_create(dev, false); if (uid > 0) dev->devx_whitelist_uid = uid; @@ -6318,9 +6372,6 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, mlx5_ib_stage_devx_init, mlx5_ib_stage_devx_cleanup), @@ -6372,9 +6423,6 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e507b6eb7c09..b06d3b1efea8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,7 +41,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -258,6 +257,7 @@ enum mlx5_ib_rq_flags { }; struct mlx5_ib_wq { + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; u32 *wr_data; struct wr_list *w_list; @@ -275,8 +275,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; - u16 last_poll; - void *qend; + void *cur_edge; }; enum mlx5_ib_wq_flags { @@ -461,6 +460,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_UNDERLAY = 1 << 10, MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, + MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, }; struct mlx5_umr_wr { @@ -524,6 +524,7 @@ struct mlx5_ib_srq { struct mlx5_core_srq msrq; struct mlx5_frag_buf buf; struct mlx5_db db; + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; /* protect SRQ hanlding */ @@ -541,7 +542,6 @@ struct mlx5_ib_srq { struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; - u16 uid; }; enum mlx5_ib_mtt_access_flags { @@ -784,7 +784,6 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, - MLX5_IB_STAGE_SPECS, MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, @@ -895,7 +894,6 @@ struct mlx5_ib_pf_eq { struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; @@ -924,6 +922,7 @@ struct mlx5_ib_dev { */ struct srcu_struct mr_srcu; u32 null_mkey; + struct workqueue_struct *advise_mr_wq; #endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -1043,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah); +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); @@ -1071,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); @@ -1088,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); @@ -1185,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1200,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} +static inline int +mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + struct ib_sge *sg_list, u32 num_sge) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1268,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, - int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, u32 counter_id, + void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + bool is_user) { return -EOPNOTSUPP; } static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline const struct uverbs_object_tree_def * -mlx5_ib_get_devx_tree(void) { return NULL; } static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { return false; } -static inline int -mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) -{ - return 0; -} static inline void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b195d65a13e..1bd8c1b1dba1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); + if (mr->umem && mr->umem->is_odp) + synchronize_srcu(&dev->mr_srcu); #endif return err; @@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + bool odp_mkey_exist = false; +#endif struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; +#endif list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + bool odp_mkey_exist = false; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -1211,7 +1224,7 @@ err_1: return ERR_PTR(err); } -static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, int npages, u64 length, int access_flags) { mr->npages = npages; @@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, kfree(in); mr->umem = NULL; - set_mr_fileds(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, 0, length, acc); return &mr->ibmr; @@ -1280,6 +1293,21 @@ err_free: return ERR_PTR(err); } +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + return -EOPNOTSUPP; + + return mlx5_ib_advise_mr_prefetch(pd, advice, flags, + sg_list, num_sge); +} + struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs) @@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fileds(dev, mr, npages, length, access_flags); + set_mr_fields(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); @@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fileds(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, npages, len, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 7309fb6bf0d2..01e0f6200631 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -549,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } +#define MLX5_PF_FLAGS_PREFETCH BIT(0) +#define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped) + u64 io_virt, size_t bcnt, u32 *bytes_mapped, + u32 flags) { int npages = 0, current_seq, page_shift, ret, np; bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask = ODP_READ_ALLOWED_BIT; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -578,7 +583,15 @@ next_mr: page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; - if (mr->umem->writable) + if (prefetch && !downgrade && !mr->umem->writable) { + /* prefetch with write-access must + * be supported by the MR + */ + ret = -EINVAL; + goto out; + } + + if (mr->umem->writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -683,12 +696,13 @@ struct pf_frame { * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, - u32 key, u64 io_virt, size_t bcnt, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, + u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped) + u32 *bytes_mapped, u32 flags) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mw *mw; @@ -710,6 +724,12 @@ next_mr: goto srcu_unlock; } + if (prefetch && mmkey->type != MLX5_MKEY_MR) { + mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); + ret = -EINVAL; + goto srcu_unlock; + } + switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -719,6 +739,11 @@ next_mr: goto srcu_unlock; } + if (prefetch && !mr->umem->is_odp) { + ret = -EINVAL; + goto srcu_unlock; + } + if (!mr->umem->is_odp) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -728,7 +753,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped); + ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); if (ret < 0) goto srcu_unlock; @@ -905,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped); + bytes_mapped, 0); if (ret < 0) break; npages += ret; @@ -1216,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, rkey, address, length, - &pfault->bytes_committed, NULL); + &pfault->bytes_committed, NULL, + 0); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1243,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, rkey, address, prefetch_len, - &bytes_committed, NULL); + &bytes_committed, NULL, + 0); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1492,10 +1519,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) } } +static const struct ib_device_ops mlx5_ib_dev_odp_ops = { + .advise_mr = mlx5_ib_advise_mr, +}; + int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); if (ret) { @@ -1527,3 +1561,76 @@ int mlx5_ib_odp_init(void) return 0; } + +struct prefetch_mr_work { + struct work_struct work; + struct mlx5_ib_dev *dev; + u32 pf_flags; + u32 num_sge; + struct ib_sge sg_list[0]; +}; + +static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, + struct ib_sge *sg_list, u32 num_sge) +{ + int i; + + for (i = 0; i < num_sge; ++i) { + struct ib_sge *sg = &sg_list[i]; + int bytes_committed = 0; + int ret; + + ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, + sg->length, + &bytes_committed, NULL, + pf_flags); + if (ret < 0) + return ret; + } + return 0; +} + +static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +{ + struct prefetch_mr_work *w = + container_of(work, struct prefetch_mr_work, work); + + if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, + w->num_sge); + + kfree(w); +} + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + struct prefetch_mr_work *work; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) + pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) + return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, + num_sge); + + if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) + return -ENODEV; + + work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + if (!work) + return -ENOMEM; + + memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + + work->dev = dev; + work->pf_flags = pf_flags; + work->num_sge = num_sge; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + schedule_work(&work->work); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a0e9ff763d42..9c94c1b9ec35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type) return is_qp0(qp_type) || is_qp1(qp_type); } -static void *get_wqe(struct mlx5_ib_qp *qp, int offset) -{ - return mlx5_buf_offset(&qp->buf, offset); -} - -static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); -} - -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); -} - /** * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. * @@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, __be64 *pas; void *qpc; int err; + u16 uid; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); + uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0; + MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, @@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); + err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size, + &qp->buf, dev->mdev->priv.numa_node); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } - qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + if (qp->rq.wqe_cnt) + mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift, + ilog2(qp->rq.wqe_cnt), &qp->rq.fbc); + + if (qp->sq.wqe_cnt) { + int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) / + MLX5_SEND_WQE_BB; + mlx5_init_fbc_offset(qp->buf.frags + + (qp->sq.offset / PAGE_SIZE), + ilog2(MLX5_SEND_WQE_BB), + ilog2(qp->sq.wqe_cnt), + sq_strides_offset, &qp->sq.fbc); + + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + } + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = kvzalloc(*inlen, GFP_KERNEL); @@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, - (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); + mlx5_fill_page_frag_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -1024,7 +1052,7 @@ err_free: kvfree(*in); err_buf: - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); return err; } @@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kvfree(qp->sq.wr_data); kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; } - if (pd && pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; @@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_BFREG_INDEX | MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), @@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } + if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { + if (init_attr->qp_type != IB_QPT_RC || + !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { + mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; + } + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || (MLX5_CAP_GEN(dev->mdev, port_type) != @@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, (pd && pd->uobject) ? &ucmd : NULL); + qp, udata ? &ucmd : NULL); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { - if (pd->uobject) { + if (udata) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); @@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - + if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { configure_responder_scat_cqe(init_attr, qpc); configure_requester_scat_cqe(dev, init_attr, - (pd && pd->uobject) ? &ucmd : NULL, + udata ? &ucmd : NULL, qpc); } @@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!pd->uobject) { + if (!udata) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); } else if (!to_mucontext(pd->uobject->context)->cqe_version) { @@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) if (rate == IB_RATE_PORT_CURRENT) return 0; - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; while (rate != IB_RATE_PORT_CURRENT && @@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; - qp->sq.last_poll = 0; + if (qp->sq.wqe_cnt) + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { req |= IB_QP_PATH_MTU; - opt = IB_QP_PKEY_INDEX; + opt = IB_QP_PKEY_INDEX | IB_QP_AV; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -3749,6 +3789,62 @@ out: return err; } +static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + if (likely(*seg != *cur_edge)) + return; + + _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; @@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - const struct ib_send_wr *wr, void *qend, - struct mlx5_ib_qp *qp, int *size) +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) { - void *seg = eseg; + struct mlx5_wqe_eth_seg *eseg = *seg; memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); @@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; - if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start); - u64 left, leftlen, copysz; + size_t left, copysz; void *pdata = ud_wr->header; + size_t stride; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* - * check if there is space till the end of queue, if yes, - * copy all in one shot, otherwise copy till the end of queue, - * rollback and than the copy the left + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * fall-through to memcpy_send_wqe. */ - leftlen = qend - (void *)eseg->inline_hdr.start; - copysz = min_t(u64, leftlen, left); - - memcpy(seg - size_of_inl_hdr_start, pdata, copysz); - - if (likely(copysz > size_of_inl_hdr_start)) { - seg += ALIGN(copysz - size_of_inl_hdr_start, 16); - *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; - } - - if (unlikely(copysz < left)) { /* the last wqe in the queue */ - seg = mlx5_get_send_wqe(qp, 0); + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy(seg, pdata, left); - seg += ALIGN(left, 16); - *size += ALIGN(left, 16) / 16; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); } + + return; } - return seg; + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, @@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } -static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, - struct mlx5_ib_mr *mr, int mr_list_size) -{ - void *qend = qp->sq.qend; - void *addr = mr->descs; - int copy; - - if (unlikely(seg + mr_list_size > qend)) { - copy = qend - seg; - memcpy(seg, addr, copy); - addr += copy; - mr_list_size -= copy; - seg = mlx5_get_send_wqe(qp, 0); - } - memcpy(seg, addr, mr_list_size); - seg += mr_list_size; -} - static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe) } static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void *wqe, int *sz) + void **wqe, int *wqe_sz, void **cur_edge) { struct mlx5_wqe_inline_seg *seg; - void *qend = qp->sq.qend; - void *addr; + size_t offset; int inl = 0; - int copy; - int len; int i; - seg = wqe; - wqe += sizeof(*seg); + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { - addr = (void *)(unsigned long)(wr->sg_list[i].addr); - len = wr->sg_list[i].length; + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + inl += len; if (unlikely(inl > qp->max_inline_data)) return -ENOMEM; - if (unlikely(wqe + len > qend)) { - copy = qend - wqe; - memcpy(wqe, addr, copy); - addr += copy; - len -= copy; - wqe = mlx5_get_send_wqe(qp, 0); + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; } - memcpy(wqe, addr, len); - wqe += len; } seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; return 0; } @@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, } static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, + int *size, void **cur_edge) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; struct ib_mr *sig_mr = wr->sig_mr; @@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += wqe_size; *size += wqe_size / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); bsf = *seg; ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); @@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += sizeof(*bsf); *size += sizeof(*bsf) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); return 0; } @@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); @@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size); + ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); if (ret) return ret; @@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size) + void **seg, int *size, void **cur_edge) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - int mr_list_size = mr->ndescs * mr->desc_size; + size_t mr_list_size = mr->ndescs * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { @@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); - *size += get_xlt_octo(mr_list_size); + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); *seg += sizeof(struct mlx5_wqe_data_seg); @@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return 0; } -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); } -static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - int tidx = idx; + u32 tidx = idx; int i, j; - pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - void *buf = mlx5_get_send_wqe(qp, tidx); tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = buf; + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + pr_debug("WQBB at %p:\n", (void *)p); j = 0; } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), @@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq, bool send_signaled, bool solicited) + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_get_send_wqe(qp, *idx); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); @@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; return 0; } @@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, wr->send_flags & IB_SEND_SIGNALED, wr->send_flags & IB_SEND_SOLICITED); } static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, - u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u32 mlx5_opcode) + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) { u8 opmod = 0; @@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; } static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; - struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; + void *cur_edge; int uninitialized_var(size); - void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp = to_mqp(ibqp); bf = &qp->bf; - qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); @@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_WR_LOCAL_INV: qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size); + set_linv_wr(qp, &seg, &size, &cur_edge); num_sge = 0; break; case IB_WR_REG_MR: qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size, + &cur_edge); if (err) { *bad_wr = wr; goto out; @@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size); + err = set_sig_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error */ err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); + break; case IB_QPT_UD: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); /* handle qp that supports ud offload */ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { @@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); seg += sizeof(struct mlx5_wqe_eth_pad); size += sizeof(struct mlx5_wqe_eth_pad) / 16; - - seg = set_eth_seg(seg, wr, qend, qp, &size); - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + set_eth_seg(wr, qp, &seg, &size, &cur_edge); + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); } break; case MLX5_IB_QPT_REG_UMR: @@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); set_reg_mkey_segment(seg, wr); seg += sizeof(struct mlx5_mkey_seg); size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); break; default: @@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } if (wr->send_flags & IB_SEND_INLINE && num_sge) { - int uninitialized_var(sz); - - err = set_data_inl_seg(qp, wr, seg, &sz); + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - size += sz; } else { - dpseg = seg; for (i = 0; i < num_sge; i++) { - if (unlikely(dpseg == qend)) { - seg = mlx5_get_send_wqe(qp, 0); - dpseg = seg; - } + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg(dpseg, wr->sg_list + i); + set_data_ptr_seg + ((struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); size += sizeof(struct mlx5_wqe_data_seg) / 16; - dpseg++; + seg += sizeof(struct mlx5_wqe_data_seg); } } } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, - mlx5_ib_opcode[wr->opcode]); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); @@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto out; } - scat = get_recv_wqe(qp, ind); + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); if (qp->wq_sig) scat++; @@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; - u16 uid; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); @@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, if (!xrcd) return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } - xrcd->uid = uid; return &xrcd->ibxrcd; } @@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - u16 uid = to_mxrcd(xrcd)->uid; int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 91dcd3918d96..4e8d18009f58 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -13,7 +13,7 @@ static void *get_wqe(struct mlx5_ib_srq *srq, int n) { - return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); + return mlx5_frag_buf_get_wqe(&srq->fbc, n); } static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) @@ -113,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; - in->uid = to_mpd(pd)->uid; + in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) in->user_index = uidx; @@ -142,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { + if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf, + dev->mdev->priv.numa_node)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; } + mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max), + &srq->fbc); + srq->head = 0; srq->tail = srq->msrq.max - 1; srq->wqe_ctr = 0; @@ -164,7 +168,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, in->pas); + mlx5_fill_page_frag_array(&srq->buf, in->pas); srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { @@ -184,7 +188,7 @@ err_in: kvfree(in->pas); err_buf: - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); err_db: mlx5_db_free(dev->mdev, &srq->db); @@ -201,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { kvfree(srq->wrid); - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } @@ -256,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, } in.type = init_attr->srq_type; - if (pd->uobject) + if (udata) err = create_srq_user(pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", - pd->uobject ? "user" : "kernel", err); + udata ? "user" : "kernel", err); goto err_srq; } @@ -308,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; @@ -323,7 +327,7 @@ err_core: mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: - if (pd->uobject) + if (udata) destroy_srq_user(pd, srq); else destroy_srq_kernel(dev, srq); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 220a3e4717a3..bfd4eebc1182 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq); + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); @@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp); + struct mthca_qp *qp, + struct ib_udata *udata); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp); + struct mthca_sqp *sqp, + struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 2e5dc0a67cfc..7ad517da4917 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev, rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], + RDMA_DESTROY_AH_SLEEPABLE); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 691c6f048938..82cb6b71ac7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd) static struct ib_ah *mthca_ah_create(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd, return &ah->ibah; } -static int mthca_ah_destroy(struct ib_ah *ah) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); @@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, if (!srq) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, } err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), - &init_attr->attr, srq); + &init_attr->attr, srq, udata); - if (err && pd->uobject) + if (err && udata) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); @@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - &init_attr->cap, qp); + &init_attr->cap, qp, udata); - if (err && pd->uobject) { + if (err && udata) { context = to_mucontext(pd->uobject->context); mthca_unmap_user_db(to_mdev(pd->device), @@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Don't allow userspace to create special QPs */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); @@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp)); + to_msqp(qp), udata); break; } default: @@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str) (int) dev->fw_ver & 0xffff); } +static const struct ib_device_ops mthca_dev_ops = { + .alloc_pd = mthca_alloc_pd, + .alloc_ucontext = mthca_alloc_ucontext, + .attach_mcast = mthca_multicast_attach, + .create_ah = mthca_ah_create, + .create_cq = mthca_create_cq, + .create_qp = mthca_create_qp, + .dealloc_pd = mthca_dealloc_pd, + .dealloc_ucontext = mthca_dealloc_ucontext, + .dereg_mr = mthca_dereg_mr, + .destroy_ah = mthca_ah_destroy, + .destroy_cq = mthca_destroy_cq, + .destroy_qp = mthca_destroy_qp, + .detach_mcast = mthca_multicast_detach, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mthca_get_dma_mr, + .get_port_immutable = mthca_port_immutable, + .mmap = mthca_mmap_uar, + .modify_device = mthca_modify_device, + .modify_port = mthca_modify_port, + .modify_qp = mthca_modify_qp, + .poll_cq = mthca_poll_cq, + .process_mad = mthca_process_mad, + .query_ah = mthca_ah_query, + .query_device = mthca_query_device, + .query_gid = mthca_query_gid, + .query_pkey = mthca_query_pkey, + .query_port = mthca_query_port, + .query_qp = mthca_query_qp, + .reg_user_mr = mthca_reg_user_mr, + .resize_cq = mthca_resize_cq, +}; + +static const struct ib_device_ops mthca_dev_arbel_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_arbel_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_tavor_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_tavor_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_arbel_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_tavor_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_tavor_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_arbel_ops = { + .post_recv = mthca_arbel_post_receive, + .post_send = mthca_arbel_post_send, + .req_notify_cq = mthca_arbel_arm_cq, +}; + +static const struct ib_device_ops mthca_dev_tavor_ops = { + .post_recv = mthca_tavor_post_receive, + .post_send = mthca_tavor_post_send, + .req_notify_cq = mthca_tavor_arm_cq, +}; + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.query_device = mthca_query_device; - dev->ib_dev.query_port = mthca_query_port; - dev->ib_dev.modify_device = mthca_modify_device; - dev->ib_dev.modify_port = mthca_modify_port; - dev->ib_dev.query_pkey = mthca_query_pkey; - dev->ib_dev.query_gid = mthca_query_gid; - dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; - dev->ib_dev.mmap = mthca_mmap_uar; - dev->ib_dev.alloc_pd = mthca_alloc_pd; - dev->ib_dev.dealloc_pd = mthca_dealloc_pd; - dev->ib_dev.create_ah = mthca_ah_create; - dev->ib_dev.query_ah = mthca_ah_query; - dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; - dev->ib_dev.query_srq = mthca_query_srq; - dev->ib_dev.destroy_srq = mthca_destroy_srq; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | @@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) - dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_srq_ops); else - dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_srq_ops); } - dev->ib_dev.create_qp = mthca_create_qp; - dev->ib_dev.modify_qp = mthca_modify_qp; - dev->ib_dev.query_qp = mthca_query_qp; - dev->ib_dev.destroy_qp = mthca_destroy_qp; - dev->ib_dev.create_cq = mthca_create_cq; - dev->ib_dev.resize_cq = mthca_resize_cq; - dev->ib_dev.destroy_cq = mthca_destroy_cq; - dev->ib_dev.poll_cq = mthca_poll_cq; - dev->ib_dev.get_dma_mr = mthca_get_dma_mr; - dev->ib_dev.reg_user_mr = mthca_reg_user_mr; - dev->ib_dev.dereg_mr = mthca_dereg_mr; - dev->ib_dev.get_port_immutable = mthca_port_immutable; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - if (dev->mthca_flags & MTHCA_FLAG_FMR) { - dev->ib_dev.alloc_fmr = mthca_alloc_fmr; - dev->ib_dev.unmap_fmr = mthca_unmap_fmr; - dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; if (mthca_is_memfree(dev)) - dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_fmr_ops); else - dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_fmr_ops); } - dev->ib_dev.attach_mcast = mthca_multicast_attach; - dev->ib_dev.detach_mcast = mthca_multicast_detach; - dev->ib_dev.process_mad = mthca_process_mad; + ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops); - if (mthca_is_memfree(dev)) { - dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; - dev->ib_dev.post_send = mthca_arbel_post_send; - dev->ib_dev.post_recv = mthca_arbel_post_receive; - } else { - dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; - dev->ib_dev.post_send = mthca_tavor_post_send; - dev->ib_dev.post_recv = mthca_tavor_post_receive; - } + if (mthca_is_memfree(dev)) + ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops); + else + ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops); mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9d178ee3c96a..4e5b5cc17f1d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int size; int err = -ENOMEM; @@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (udata) return 0; size = PAGE_ALIGN(qp->send_wqe_offset + @@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int ret; int i; @@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, if (ret) return ret; - ret = mthca_alloc_wqe_buf(dev, pd, qp); + ret = mthca_alloc_wqe_buf(dev, pd, qp, udata); if (ret) { mthca_unmap_memfree(dev, qp); return ret; @@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (udata) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int err; @@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, qp->port = 0; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, qp); + send_policy, qp, udata); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; @@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp) + struct mthca_sqp *sqp, + struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; @@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp); + send_policy, &sqp->qp, udata); if (err) goto err_out_free; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 9a3fc6fb0d7e..b8333c79e3fa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe) static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_tavor_srq_context *context) + struct mthca_tavor_srq_context *context, + bool is_user) { memset(context, 0, sizeof *context); @@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, context->state_pd = cpu_to_be32(pd->pd_num); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) + if (is_user) context->uar = cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_arbel_srq_context *context) + struct mthca_arbel_srq_context *context, + bool is_user) { int logsize, max; @@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) + if (is_user) context->logstride_usrpage |= cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) } static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_srq *srq) + struct mthca_srq *srq, struct ib_udata *udata) { struct mthca_data_seg *scatter; void *wqe; int err; int i; - if (pd->ibpd.uobject) + if (udata) return 0; srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); @@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, } int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq) + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata) { struct mthca_mailbox *mailbox; int ds; @@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!udata) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_db; } - err = mthca_alloc_srq_buf(dev, pd, srq); + err = mthca_alloc_srq_buf(dev, pd, srq, udata); if (err) goto err_out_mailbox; @@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) - mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata); else - mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata); err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); @@ -297,14 +300,14 @@ err_out_free_srq: mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); err_out_free_buf: - if (!pd->ibpd.uobject) + if (!udata) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!udata && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 2b67ace5b614..032883180f65 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 92d1cadd4cfd..4e7f08ee1907 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if ((ibpd->uobject) && (ibpd->uobject->context)) { + if (udata && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); if (virt_wqs) { @@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_put_cqp_request(nesdev, cqp_request); - if (ibpd->uobject) { + if (udata) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.mmap_rq_db_index = 0; uresp.actual_sq_size = sq_size; @@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } +static const struct ib_device_ops nes_dev_ops = { + .alloc_mr = nes_alloc_mr, + .alloc_mw = nes_alloc_mw, + .alloc_pd = nes_alloc_pd, + .alloc_ucontext = nes_alloc_ucontext, + .create_cq = nes_create_cq, + .create_qp = nes_create_qp, + .dealloc_mw = nes_dealloc_mw, + .dealloc_pd = nes_dealloc_pd, + .dealloc_ucontext = nes_dealloc_ucontext, + .dereg_mr = nes_dereg_mr, + .destroy_cq = nes_destroy_cq, + .destroy_qp = nes_destroy_qp, + .drain_rq = nes_drain_rq, + .drain_sq = nes_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = nes_get_dma_mr, + .get_port_immutable = nes_port_immutable, + .map_mr_sg = nes_map_mr_sg, + .mmap = nes_mmap, + .modify_qp = nes_modify_qp, + .poll_cq = nes_poll_cq, + .post_recv = nes_post_recv, + .post_send = nes_post_send, + .query_device = nes_query_device, + .query_gid = nes_query_gid, + .query_pkey = nes_query_pkey, + .query_port = nes_query_port, + .query_qp = nes_query_qp, + .reg_user_mr = nes_reg_user_mr, + .req_notify_cq = nes_req_notify_cq, +}; + /** * nes_init_ofa_device */ @@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.phys_port_cnt = 1; nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.query_device = nes_query_device; - nesibdev->ibdev.query_port = nes_query_port; - nesibdev->ibdev.query_pkey = nes_query_pkey; - nesibdev->ibdev.query_gid = nes_query_gid; - nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; - nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; - nesibdev->ibdev.mmap = nes_mmap; - nesibdev->ibdev.alloc_pd = nes_alloc_pd; - nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_qp = nes_create_qp; - nesibdev->ibdev.modify_qp = nes_modify_qp; - nesibdev->ibdev.query_qp = nes_query_qp; - nesibdev->ibdev.destroy_qp = nes_destroy_qp; - nesibdev->ibdev.create_cq = nes_create_cq; - nesibdev->ibdev.destroy_cq = nes_destroy_cq; - nesibdev->ibdev.poll_cq = nes_poll_cq; - nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; - nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; - nesibdev->ibdev.dereg_mr = nes_dereg_mr; - nesibdev->ibdev.alloc_mw = nes_alloc_mw; - nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; - - nesibdev->ibdev.alloc_mr = nes_alloc_mr; - nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; - nesibdev->ibdev.post_send = nes_post_send; - nesibdev->ibdev.post_recv = nes_post_recv; - nesibdev->ibdev.drain_sq = nes_drain_sq; - nesibdev->ibdev.drain_rq = nes_drain_rq; nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { @@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - nesibdev->ibdev.get_port_immutable = nes_port_immutable; - nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; + + ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 58188fe5aed2..a7295322efbc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, } struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { u32 *ahid_addr; int status; @@ -219,7 +219,7 @@ av_err: return ERR_PTR(status); } -int ocrdma_destroy_ah(struct ib_ah *ibah) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index c0c32c9b80ae..eb996e14b520 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -52,8 +52,8 @@ enum { }; struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 873cc7f6fe61..1f393842453a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = { .attrs = ocrdma_attributes, }; +static const struct ib_device_ops ocrdma_dev_ops = { + .alloc_mr = ocrdma_alloc_mr, + .alloc_pd = ocrdma_alloc_pd, + .alloc_ucontext = ocrdma_alloc_ucontext, + .create_ah = ocrdma_create_ah, + .create_cq = ocrdma_create_cq, + .create_qp = ocrdma_create_qp, + .dealloc_pd = ocrdma_dealloc_pd, + .dealloc_ucontext = ocrdma_dealloc_ucontext, + .dereg_mr = ocrdma_dereg_mr, + .destroy_ah = ocrdma_destroy_ah, + .destroy_cq = ocrdma_destroy_cq, + .destroy_qp = ocrdma_destroy_qp, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = ocrdma_get_dma_mr, + .get_link_layer = ocrdma_link_layer, + .get_netdev = ocrdma_get_netdev, + .get_port_immutable = ocrdma_port_immutable, + .map_mr_sg = ocrdma_map_mr_sg, + .mmap = ocrdma_mmap, + .modify_port = ocrdma_modify_port, + .modify_qp = ocrdma_modify_qp, + .poll_cq = ocrdma_poll_cq, + .post_recv = ocrdma_post_recv, + .post_send = ocrdma_post_send, + .process_mad = ocrdma_process_mad, + .query_ah = ocrdma_query_ah, + .query_device = ocrdma_query_device, + .query_pkey = ocrdma_query_pkey, + .query_port = ocrdma_query_port, + .query_qp = ocrdma_query_qp, + .reg_user_mr = ocrdma_reg_user_mr, + .req_notify_cq = ocrdma_arm_cq, + .resize_cq = ocrdma_resize_cq, +}; + +static const struct ib_device_ops ocrdma_dev_srq_ops = { + .create_srq = ocrdma_create_srq, + .destroy_srq = ocrdma_destroy_srq, + .modify_srq = ocrdma_modify_srq, + .post_srq_recv = ocrdma_post_srq_recv, + .query_srq = ocrdma_query_srq, +}; + static int ocrdma_register_device(struct ocrdma_dev *dev) { ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); @@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; - /* mandatory verbs. */ - dev->ibdev.query_device = ocrdma_query_device; - dev->ibdev.query_port = ocrdma_query_port; - dev->ibdev.modify_port = ocrdma_modify_port; - dev->ibdev.get_netdev = ocrdma_get_netdev; - dev->ibdev.get_link_layer = ocrdma_link_layer; - dev->ibdev.alloc_pd = ocrdma_alloc_pd; - dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; - - dev->ibdev.create_cq = ocrdma_create_cq; - dev->ibdev.destroy_cq = ocrdma_destroy_cq; - dev->ibdev.resize_cq = ocrdma_resize_cq; - - dev->ibdev.create_qp = ocrdma_create_qp; - dev->ibdev.modify_qp = ocrdma_modify_qp; - dev->ibdev.query_qp = ocrdma_query_qp; - dev->ibdev.destroy_qp = ocrdma_destroy_qp; - - dev->ibdev.query_pkey = ocrdma_query_pkey; - dev->ibdev.create_ah = ocrdma_create_ah; - dev->ibdev.destroy_ah = ocrdma_destroy_ah; - dev->ibdev.query_ah = ocrdma_query_ah; - - dev->ibdev.poll_cq = ocrdma_poll_cq; - dev->ibdev.post_send = ocrdma_post_send; - dev->ibdev.post_recv = ocrdma_post_recv; - dev->ibdev.req_notify_cq = ocrdma_arm_cq; - - dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; - dev->ibdev.dereg_mr = ocrdma_dereg_mr; - dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; - - dev->ibdev.alloc_mr = ocrdma_alloc_mr; - dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; - /* mandatory to support user space verbs consumer. */ - dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; - dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; - dev->ibdev.mmap = ocrdma_mmap; dev->ibdev.dev.parent = &dev->nic_info.pdev->dev; - dev->ibdev.process_mad = ocrdma_process_mad; - dev->ibdev.get_port_immutable = ocrdma_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) OCRDMA_UVERBS(DESTROY_SRQ) | OCRDMA_UVERBS(POST_SRQ_RECV); - dev->ibdev.create_srq = ocrdma_create_srq; - dev->ibdev.modify_srq = ocrdma_modify_srq; - dev->ibdev.query_srq = ocrdma_query_srq; - dev->ibdev.destroy_srq = ocrdma_destroy_srq; - dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 290d776edf48..dd15474b19b7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = { void ocrdma_add_port_stats(struct ocrdma_dev *dev) { + const struct pci_dev *pdev = dev->nic_info.pdev; + if (!ocrdma_dbgfs_dir) return; /* Create post stats base dir */ - dev->dir = - debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir); + dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir); if (!dev->dir) goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06d2a7f3304c..c46bed0c5513 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev, /* props being zeroed by the caller, avoid zeroing it here */ dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); - return -EINVAL; - } netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; @@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev, int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct ocrdma_dev *dev; - - dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); - return -EINVAL; - } return 0; } @@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) } static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { if ((attrs->qp_type != IB_QPT_GSI) && (attrs->qp_type != IB_QPT_RC) && @@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, return -EINVAL; } /* unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); @@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; - status = ocrdma_check_qp_params(ibpd, dev, attrs); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 8d6ff9df49fe..75940e2a8791 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = { .attrs = qedr_attributes, }; +static const struct ib_device_ops qedr_iw_dev_ops = { + .get_port_immutable = qedr_iw_port_immutable, + .query_gid = qedr_iw_query_gid, +}; + static int qedr_iw_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_RNIC; - dev->ibdev.query_gid = qedr_iw_query_gid; - dev->ibdev.get_port_immutable = qedr_iw_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev) return 0; } +static const struct ib_device_ops qedr_roce_dev_ops = { + .get_port_immutable = qedr_roce_port_immutable, +}; + static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; - dev->ibdev.get_port_immutable = qedr_roce_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); } +static const struct ib_device_ops qedr_dev_ops = { + .alloc_mr = qedr_alloc_mr, + .alloc_pd = qedr_alloc_pd, + .alloc_ucontext = qedr_alloc_ucontext, + .create_ah = qedr_create_ah, + .create_cq = qedr_create_cq, + .create_qp = qedr_create_qp, + .create_srq = qedr_create_srq, + .dealloc_pd = qedr_dealloc_pd, + .dealloc_ucontext = qedr_dealloc_ucontext, + .dereg_mr = qedr_dereg_mr, + .destroy_ah = qedr_destroy_ah, + .destroy_cq = qedr_destroy_cq, + .destroy_qp = qedr_destroy_qp, + .destroy_srq = qedr_destroy_srq, + .get_dev_fw_str = qedr_get_dev_fw_str, + .get_dma_mr = qedr_get_dma_mr, + .get_link_layer = qedr_link_layer, + .get_netdev = qedr_get_netdev, + .map_mr_sg = qedr_map_mr_sg, + .mmap = qedr_mmap, + .modify_port = qedr_modify_port, + .modify_qp = qedr_modify_qp, + .modify_srq = qedr_modify_srq, + .poll_cq = qedr_poll_cq, + .post_recv = qedr_post_recv, + .post_send = qedr_post_send, + .post_srq_recv = qedr_post_srq_recv, + .process_mad = qedr_process_mad, + .query_device = qedr_query_device, + .query_pkey = qedr_query_pkey, + .query_port = qedr_query_port, + .query_qp = qedr_query_qp, + .query_srq = qedr_query_srq, + .reg_user_mr = qedr_reg_user_mr, + .req_notify_cq = qedr_arm_cq, + .resize_cq = qedr_resize_cq, +}; + static int qedr_register_device(struct qedr_dev *dev) { int rc; @@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; - - dev->ibdev.query_device = qedr_query_device; - dev->ibdev.query_port = qedr_query_port; - dev->ibdev.modify_port = qedr_modify_port; - - dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; - dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; - dev->ibdev.mmap = qedr_mmap; - - dev->ibdev.alloc_pd = qedr_alloc_pd; - dev->ibdev.dealloc_pd = qedr_dealloc_pd; - - dev->ibdev.create_cq = qedr_create_cq; - dev->ibdev.destroy_cq = qedr_destroy_cq; - dev->ibdev.resize_cq = qedr_resize_cq; - dev->ibdev.req_notify_cq = qedr_arm_cq; - - dev->ibdev.create_qp = qedr_create_qp; - dev->ibdev.modify_qp = qedr_modify_qp; - dev->ibdev.query_qp = qedr_query_qp; - dev->ibdev.destroy_qp = qedr_destroy_qp; - - dev->ibdev.create_srq = qedr_create_srq; - dev->ibdev.destroy_srq = qedr_destroy_srq; - dev->ibdev.modify_srq = qedr_modify_srq; - dev->ibdev.query_srq = qedr_query_srq; - dev->ibdev.post_srq_recv = qedr_post_srq_recv; - dev->ibdev.query_pkey = qedr_query_pkey; - - dev->ibdev.create_ah = qedr_create_ah; - dev->ibdev.destroy_ah = qedr_destroy_ah; - - dev->ibdev.get_dma_mr = qedr_get_dma_mr; - dev->ibdev.dereg_mr = qedr_dereg_mr; - dev->ibdev.reg_user_mr = qedr_reg_user_mr; - dev->ibdev.alloc_mr = qedr_alloc_mr; - dev->ibdev.map_mr_sg = qedr_map_mr_sg; - - dev->ibdev.poll_cq = qedr_poll_cq; - dev->ibdev.post_send = qedr_post_send; - dev->ibdev.post_recv = qedr_post_recv; - - dev->ibdev.process_mad = qedr_process_mad; - - dev->ibdev.get_netdev = qedr_get_netdev; - dev->ibdev.dev.parent = &dev->pdev->dev; - dev->ibdev.get_link_layer = qedr_link_layer; - dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); + ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); + dev->ibdev.driver_id = RDMA_DRIVER_QEDR; return ib_register_device(&dev->ibdev, "qedr%d", NULL); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 82ee4b4a7084..b342a70e2814 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) struct qed_rdma_port *rdma_port; dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } if (!dev->rdma_ctx) { DP_ERR(dev, "rdma_ctx is NULL\n"); @@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct qedr_dev *dev; - - dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } - return 0; } @@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, } static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { struct qedr_device_attr *qattr = &dev->attr; @@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, } /* Unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { DP_ERR(dev, "create qp: userspace can't create special QPs of type=0x%x\n", attrs->qp_type); @@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); - if (ibsrq->pd->uobject) + if (ibsrq->uobject) qedr_free_srq_user_params(srq); else qedr_free_srq_kernel_params(srq); @@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); - rc = qedr_check_qp_attrs(ibpd, dev, attrs); + rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata); if (rc) return ERR_PTR(rc); @@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp) } struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct qedr_ah *ah; @@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, return &ah->ibah; } -int qedr_destroy_ah(struct ib_ah *ibah) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 0b7d0124b16c..1852b7012bf4 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah); + u32 flags, struct ib_udata *udata); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index fb1ff59f40bd..cdbf707fa267 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 163a57a88742..9fde45538f6e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); @@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd) unsigned word = i / 64; unsigned bit = i & 63; - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bf5e222eed8e..17d6b24b3473 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs, *msg++ = ','; len--; } - BUG_ON(!msp->sz); /* msp->sz counts the nul */ took = min_t(size_t, msp->sz - (size_t)1, len); memcpy(msg, msp->msg, took); @@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) unsigned word = erstbuf / BITS_PER_LONG; unsigned bit = erstbuf & (BITS_PER_LONG - 1); - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index d7cdc77d6306..9fd69903ca57 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; - BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); rcd->rcvegrbufs_perchunk_shift = ilog2(rcd->rcvegrbufs_perchunk); } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 4845d000c22f..f92faf5ec369 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) - rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah); + rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 30595b358d8f..864f2af171f7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); -MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets"); /* * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 757d4c9d713d..3d64081c4819 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -572,12 +572,13 @@ retry: len = sge->length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 4d4c31ea4e2d..868da0ece7ba 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); sge->vaddr += len; sge->length -= len; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 926f3c8eba69..31c523b2a9f5 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); - BUG_ON(ret == 0); } pq->sdma_rb_node = sdma_rb_node; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4b0f5761a646..276304f611ab 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) len = length; if (len > sge.sge_length) len = sge.sge_length; - BUG_ON(len == 0); if (((long) sge.vaddr & (sizeof(u32) - 1)) || (len != length && (len & (sizeof(u32) - 1)))) { ndesc = 0; @@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); memcpy(data, sge->vaddr, len); sge->vaddr += len; sge->length -= len; @@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, len = length; if (len > ss->sge.sge_length) len = ss->sge.sge_length; - BUG_ON(len == 0); /* If the source address is not aligned, try to align it. */ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); if (off) { @@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } @@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd) dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode; } +static const struct ib_device_ops qib_dev_ops = { + .modify_device = qib_modify_device, + .process_mad = qib_process_mad, +}; + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->node_guid = ppd->guid; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = qib_modify_device; - ibdev->process_mad = qib_process_mad; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); @@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd) } rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); + ib_set_device_ops(ibdev, &qib_dev_ops); ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 73bd00f8d2c8..b2323a52a0dd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops usnic_dev_ops = { + .alloc_pd = usnic_ib_alloc_pd, + .alloc_ucontext = usnic_ib_alloc_ucontext, + .create_ah = usnic_ib_create_ah, + .create_cq = usnic_ib_create_cq, + .create_qp = usnic_ib_create_qp, + .dealloc_pd = usnic_ib_dealloc_pd, + .dealloc_ucontext = usnic_ib_dealloc_ucontext, + .dereg_mr = usnic_ib_dereg_mr, + .destroy_ah = usnic_ib_destroy_ah, + .destroy_cq = usnic_ib_destroy_cq, + .destroy_qp = usnic_ib_destroy_qp, + .get_dev_fw_str = usnic_get_dev_fw_str, + .get_dma_mr = usnic_ib_get_dma_mr, + .get_link_layer = usnic_ib_port_link_layer, + .get_netdev = usnic_get_netdev, + .get_port_immutable = usnic_port_immutable, + .mmap = usnic_ib_mmap, + .modify_qp = usnic_ib_modify_qp, + .poll_cq = usnic_ib_poll_cq, + .post_recv = usnic_ib_post_recv, + .post_send = usnic_ib_post_send, + .query_device = usnic_ib_query_device, + .query_gid = usnic_ib_query_gid, + .query_pkey = usnic_ib_query_pkey, + .query_port = usnic_ib_query_port, + .query_qp = usnic_ib_query_qp, + .reg_user_mr = usnic_ib_reg_mr, + .req_notify_cq = usnic_ib_req_notify_cq, +}; + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - us_ibdev->ib_dev.query_device = usnic_ib_query_device; - us_ibdev->ib_dev.query_port = usnic_ib_query_port; - us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; - us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; - us_ibdev->ib_dev.get_netdev = usnic_get_netdev; - us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; - us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; - us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; - us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; - us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; - us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; - us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; - us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; - us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; - us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; - us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; - us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; - us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; - us_ibdev->ib_dev.mmap = usnic_ib_mmap; - us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; - us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; - us_ibdev->ib_dev.post_send = usnic_ib_post_send; - us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; - us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; - us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; - us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; - us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; - us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; - + ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); @@ -649,7 +652,7 @@ static int __init usnic_ib_init(void) err = usnic_uiom_init(DRV_NAME); if (err) { - usnic_err("Unable to initalize umem with err %d\n", err); + usnic_err("Unable to initialize umem with err %d\n", err); return err; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index bf5136533d49..0cdb156e165e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], res_spec); if (err) { - usnic_err("Spec does not meet miniumum req for transport %d\n", + usnic_err("Spec does not meet minimum req for transport %d\n", transport); log_spec(res_spec); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 0b91ff36768a..1d4abef17e38 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); - mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - } + /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, return ERR_PTR(-EPERM); } -int usnic_ib_destroy_ah(struct ib_ah *ah) +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2a2c9beb715f..e33144261b9a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); -int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 398443f43dc3..eaa109dbc96a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, return netdev; } +static const struct ib_device_ops pvrdma_dev_ops = { + .add_gid = pvrdma_add_gid, + .alloc_mr = pvrdma_alloc_mr, + .alloc_pd = pvrdma_alloc_pd, + .alloc_ucontext = pvrdma_alloc_ucontext, + .create_ah = pvrdma_create_ah, + .create_cq = pvrdma_create_cq, + .create_qp = pvrdma_create_qp, + .dealloc_pd = pvrdma_dealloc_pd, + .dealloc_ucontext = pvrdma_dealloc_ucontext, + .del_gid = pvrdma_del_gid, + .dereg_mr = pvrdma_dereg_mr, + .destroy_ah = pvrdma_destroy_ah, + .destroy_cq = pvrdma_destroy_cq, + .destroy_qp = pvrdma_destroy_qp, + .get_dev_fw_str = pvrdma_get_fw_ver_str, + .get_dma_mr = pvrdma_get_dma_mr, + .get_link_layer = pvrdma_port_link_layer, + .get_netdev = pvrdma_get_netdev, + .get_port_immutable = pvrdma_port_immutable, + .map_mr_sg = pvrdma_map_mr_sg, + .mmap = pvrdma_mmap, + .modify_port = pvrdma_modify_port, + .modify_qp = pvrdma_modify_qp, + .poll_cq = pvrdma_poll_cq, + .post_recv = pvrdma_post_recv, + .post_send = pvrdma_post_send, + .query_device = pvrdma_query_device, + .query_gid = pvrdma_query_gid, + .query_pkey = pvrdma_query_pkey, + .query_port = pvrdma_query_port, + .query_qp = pvrdma_query_qp, + .reg_user_mr = pvrdma_reg_user_mr, + .req_notify_cq = pvrdma_req_notify_cq, +}; + +static const struct ib_device_ops pvrdma_dev_srq_ops = { + .create_srq = pvrdma_create_srq, + .destroy_srq = pvrdma_destroy_srq, + .modify_srq = pvrdma_modify_srq, + .query_srq = pvrdma_query_srq, +}; + static int pvrdma_register_device(struct pvrdma_dev *dev) { int ret = -1; @@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; - dev->ib_dev.query_device = pvrdma_query_device; - dev->ib_dev.query_port = pvrdma_query_port; - dev->ib_dev.query_gid = pvrdma_query_gid; - dev->ib_dev.query_pkey = pvrdma_query_pkey; - dev->ib_dev.modify_port = pvrdma_modify_port; - dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; - dev->ib_dev.mmap = pvrdma_mmap; - dev->ib_dev.alloc_pd = pvrdma_alloc_pd; - dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; - dev->ib_dev.create_ah = pvrdma_create_ah; - dev->ib_dev.destroy_ah = pvrdma_destroy_ah; - dev->ib_dev.create_qp = pvrdma_create_qp; - dev->ib_dev.modify_qp = pvrdma_modify_qp; - dev->ib_dev.query_qp = pvrdma_query_qp; - dev->ib_dev.destroy_qp = pvrdma_destroy_qp; - dev->ib_dev.post_send = pvrdma_post_send; - dev->ib_dev.post_recv = pvrdma_post_recv; - dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.destroy_cq = pvrdma_destroy_cq; - dev->ib_dev.poll_cq = pvrdma_poll_cq; - dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; - dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; - dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; - dev->ib_dev.dereg_mr = pvrdma_dereg_mr; - dev->ib_dev.alloc_mr = pvrdma_alloc_mr; - dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; - dev->ib_dev.add_gid = pvrdma_add_gid; - dev->ib_dev.del_gid = pvrdma_del_gid; - dev->ib_dev.get_netdev = pvrdma_get_netdev; - dev->ib_dev.get_port_immutable = pvrdma_port_immutable; - dev->ib_dev.get_link_layer = pvrdma_port_link_layer; - dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); mutex_init(&dev->port_mutex); spin_lock_init(&dev->desc_lock); @@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->ib_dev.create_srq = pvrdma_create_srq; - dev->ib_dev.modify_srq = pvrdma_modify_srq; - dev->ib_dev.query_srq = pvrdma_query_srq; - dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index cf22f57a9f0d..3acf74cbe266 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, init_completion(&qp->free); qp->state = IB_QPS_RESET; - qp->is_kernel = !(pd->uobject && udata); + qp->is_kernel = !udata; if (!qp->is_kernel) { dev_dbg(&dev->pdev->dev, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index dc0ce877c7a3..06ba7c7a2235 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, unsigned long flags; int ret; - if (!(pd->uobject && udata)) { + if (!udata) { /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index b65d10b0a875..4d238d0e484b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) * @pd: the protection domain * @ah_attr: the attributes of the AH * @udata: user data blob + * @flags: create address handle flags (see enum rdma_create_ah_flags) * * @return: the ib_ah pointer on success, otherwise errno. */ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_ah *ah; @@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); @@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, /** * pvrdma_destroy_ah - destroy an address handle * @ah: the address handle to destroyed + * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b2e3ab50cb08..f7f758d60110 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, |