diff options
author | Yuval Shaia <yuval.shaia@oracle.com> | 2018-12-21 16:40:25 +0200 |
---|---|---|
committer | Marcel Apfelbaum <marcel.apfelbaum@gmail.com> | 2018-12-22 11:09:56 +0200 |
commit | 2b05705dc8ad80c09a3aa9cc70c14fb8323b0fd3 (patch) | |
tree | f3d3a9f7632b9fd1dcfa8a2dbdd83de617222289 /hw/rdma | |
parent | 4a5c9903f3164cef134e7f81b361b3fa7d5c0b52 (diff) |
hw/pvrdma: Add support to allow guest to configure GID table
The control over the RDMA device's GID table is done by updating the
device's Ethernet function addresses.
Usually the first GID entry is determined by the MAC address, the second
by the first IPv6 address and the third by the IPv4 address. Other
entries can be added by adding more IP addresses. The opposite is the
same, i.e. whenever an address is removed, the corresponding GID entry
is removed.
The process is done by the network and RDMA stacks. Whenever an address
is added the ib_core driver is notified and calls the device driver
add_gid function which in turn update the device.
To support this in pvrdma device we need to hook into the create_bind
and destroy_bind HW commands triggered by pvrdma driver in guest.
Whenever a change is made to the pvrdma port's GID table a special QMP
message is sent to be processed by libvirt to update the address of the
backend Ethernet device.
Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Marcel Apfelbaum<marcel.apfelbaum@gmail.com>
Signed-off-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Diffstat (limited to 'hw/rdma')
-rw-r--r-- | hw/rdma/rdma_backend.c | 344 | ||||
-rw-r--r-- | hw/rdma/rdma_backend.h | 22 | ||||
-rw-r--r-- | hw/rdma/rdma_backend_defs.h | 11 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.c | 104 | ||||
-rw-r--r-- | hw/rdma/rdma_rm.h | 17 | ||||
-rw-r--r-- | hw/rdma/rdma_rm_defs.h | 9 | ||||
-rw-r--r-- | hw/rdma/rdma_utils.h | 16 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma.h | 2 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_cmd.c | 55 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_main.c | 25 | ||||
-rw-r--r-- | hw/rdma/vmw/pvrdma_qp_ops.c | 20 |
11 files changed, 462 insertions, 163 deletions
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index c6dedda555..1d496bbd95 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -15,15 +15,18 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" +#include "sysemu/sysemu.h" #include "qapi/error.h" #include "qapi/qmp/qlist.h" #include "qapi/qmp/qnum.h" +#include "qapi/qapi-events-rdma.h" #include <infiniband/verbs.h> #include <infiniband/umad_types.h> #include <infiniband/umad.h> #include <rdma/rdma_user_cm.h> +#include "contrib/rdmacm-mux/rdmacm-mux.h" #include "trace.h" #include "rdma_utils.h" #include "rdma_rm.h" @@ -160,6 +163,77 @@ static void *comp_handler_thread(void *arg) return NULL; } +static inline void disable_rdmacm_mux_async(RdmaBackendDev *backend_dev) +{ + atomic_set(&backend_dev->rdmacm_mux.can_receive, 0); +} + +static inline void enable_rdmacm_mux_async(RdmaBackendDev *backend_dev) +{ + atomic_set(&backend_dev->rdmacm_mux.can_receive, sizeof(RdmaCmMuxMsg)); +} + +static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev) +{ + return atomic_read(&backend_dev->rdmacm_mux.can_receive); +} + +static int check_mux_op_status(CharBackend *mad_chr_be) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("Reading response\n"); + ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg)); + if (ret != sizeof(msg)) { + pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg)); + return -EIO; + } + + pr_dbg("msg_type=%d\n", msg.hdr.msg_type); + pr_dbg("op_code=%d\n", msg.hdr.op_code); + pr_dbg("err_code=%d\n", msg.hdr.err_code); + + if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) { + pr_dbg("Invalid message type %d\n", msg.hdr.msg_type); + return -EIO; + } + + if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) { + pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code); + return -EIO; + } + + return 0; +} + +static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg) +{ + int rc = 0; + + pr_dbg("Executing request %d\n", msg->hdr.op_code); + + msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; + disable_rdmacm_mux_async(backend_dev); + rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be, + (const uint8_t *)msg, sizeof(*msg)); + if (rc != sizeof(*msg)) { + enable_rdmacm_mux_async(backend_dev); + pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc); + return -EIO; + } + + rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be); + if (rc) { + pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n", + msg->hdr.op_code, rc); + } + + enable_rdmacm_mux_async(backend_dev); + + return 0; +} + static void stop_backend_thread(RdmaBackendThread *thread) { thread->run = false; @@ -300,11 +374,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return 0; } -static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge, - uint32_t num_sge) +static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, + union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge) { - struct backend_umad umad = {0}; - char *hdr, *msg; + RdmaCmMuxMsg msg = {0}; + char *hdr, *data; int ret; pr_dbg("num_sge=%d\n", num_sge); @@ -313,26 +387,31 @@ static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge, return -EINVAL; } - umad.hdr.length = sge[0].length + sge[1].length; - pr_dbg("msg_len=%d\n", umad.hdr.length); + msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD; + memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid)); - if (umad.hdr.length > sizeof(umad.mad)) { + msg.umad_len = sge[0].length + sge[1].length; + pr_dbg("umad_len=%d\n", msg.umad_len); + + if (msg.umad_len > sizeof(msg.umad.mad)) { return -ENOMEM; } - umad.hdr.addr.qpn = htobe32(1); - umad.hdr.addr.grh_present = 1; - umad.hdr.addr.gid_index = backend_dev->backend_gid_idx; - memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid)); - umad.hdr.addr.hop_limit = 0xFF; + msg.umad.hdr.addr.qpn = htobe32(1); + msg.umad.hdr.addr.grh_present = 1; + pr_dbg("sgid_idx=%d\n", sgid_idx); + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); + msg.umad.hdr.addr.gid_index = sgid_idx; + memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid)); + msg.umad.hdr.addr.hop_limit = 0xFF; hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length); if (!hdr) { pr_dbg("Fail to map to sge[0]\n"); return -ENOMEM; } - msg = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length); - if (!msg) { + data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length); + if (!data) { pr_dbg("Fail to map to sge[1]\n"); rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); return -ENOMEM; @@ -341,25 +420,27 @@ static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge, pr_dbg_buf("mad_hdr", hdr, sge[0].length); pr_dbg_buf("mad_data", data, sge[1].length); - memcpy(&umad.mad[0], hdr, sge[0].length); - memcpy(&umad.mad[sge[0].length], msg, sge[1].length); + memcpy(&msg.umad.mad[0], hdr, sge[0].length); + memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length); - rdma_pci_dma_unmap(backend_dev->dev, msg, sge[1].length); + rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length); rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); - ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad, - sizeof(umad)); - - pr_dbg("qemu_chr_fe_write=%d\n", ret); + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret); + return -EIO; + } - return (ret != sizeof(umad)); + return 0; } void rdma_backend_post_send(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, - union ibv_gid *dgid, uint32_t dqpn, - uint32_t dqkey, void *ctx) + uint8_t sgid_idx, union ibv_gid *sgid, + union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey, + void *ctx) { BackendCtx *bctx; struct ibv_sge new_sge[MAX_SGE]; @@ -373,7 +454,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); } else if (qp_type == IBV_QPT_GSI) { pr_dbg("QP1\n"); - rc = mad_send(backend_dev, sge, num_sge); + rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge); if (rc) { comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); } else { @@ -409,8 +490,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, } if (qp_type == IBV_QPT_UD) { - wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, - backend_dev->backend_gid_idx, dgid); + wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid); if (!wr.wr.ud.ah) { comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto out_dealloc_cqe_ctx; @@ -715,9 +795,9 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, } int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, - uint8_t qp_type, union ibv_gid *dgid, - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, - bool use_qkey) + uint8_t qp_type, uint8_t sgid_idx, + union ibv_gid *dgid, uint32_t dqpn, + uint32_t rq_psn, uint32_t qkey, bool use_qkey) { struct ibv_qp_attr attr = {0}; union ibv_gid ibv_gid = { @@ -729,13 +809,15 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, attr.qp_state = IBV_QPS_RTR; attr_mask = IBV_QP_STATE; + qp->sgid_idx = sgid_idx; + switch (qp_type) { case IBV_QPT_RC: pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n", be64_to_cpu(ibv_gid.global.subnet_prefix), be64_to_cpu(ibv_gid.global.interface_id)); pr_dbg("dqpn=0x%x\n", dqpn); - pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx); + pr_dbg("sgid_idx=%d\n", qp->sgid_idx); pr_dbg("sport_num=%d\n", backend_dev->port_num); pr_dbg("rq_psn=0x%x\n", rq_psn); @@ -747,7 +829,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, attr.ah_attr.is_global = 1; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.grh.dgid = ibv_gid; - attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx; + attr.ah_attr.grh.sgid_index = qp->sgid_idx; attr.rq_psn = rq_psn; attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | @@ -756,8 +838,8 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, break; case IBV_QPT_UD: + pr_dbg("qkey=0x%x\n", qkey); if (use_qkey) { - pr_dbg("qkey=0x%x\n", qkey); attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } @@ -873,29 +955,19 @@ static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, grh->dgid = *my_gid; pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen); - pr_dbg("my_gid=0x%llx\n", my_gid->global.interface_id); - pr_dbg("gid=0x%llx\n", sgid->global.interface_id); -} - -static inline int mad_can_receieve(void *opaque) -{ - return sizeof(struct backend_umad); + pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id); + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); } -static void mad_read(void *opaque, const uint8_t *buf, int size) +static void process_incoming_mad_req(RdmaBackendDev *backend_dev, + RdmaCmMuxMsg *msg) { - RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; QObject *o_ctx_id; unsigned long cqe_ctx_id; BackendCtx *bctx; char *mad; - struct backend_umad *umad; - - assert(size != sizeof(umad)); - umad = (struct backend_umad *)buf; - pr_dbg("Got %d bytes\n", size); - pr_dbg("umad->hdr.length=%d\n", umad->hdr.length); + pr_dbg("umad_len=%d\n", msg->umad_len); #ifdef PVRDMA_DEBUG struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad; @@ -925,15 +997,16 @@ static void mad_read(void *opaque, const uint8_t *buf, int size) mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr, bctx->sge.length); - if (!mad || bctx->sge.length < umad->hdr.length + MAD_HDR_SIZE) { + if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) { comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF, bctx->up_ctx); } else { + pr_dbg_buf("mad", msg->umad.mad, msg->umad_len); memset(mad, 0, bctx->sge.length); build_mad_hdr((struct ibv_grh *)mad, - (union ibv_gid *)&umad->hdr.addr.gid, - &backend_dev->gid, umad->hdr.length); - memcpy(&mad[MAD_HDR_SIZE], umad->mad, umad->hdr.length); + (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid, + msg->umad_len); + memcpy(&mad[MAD_HDR_SIZE], msg->umad.mad, msg->umad_len); rdma_pci_dma_unmap(backend_dev->dev, mad, bctx->sge.length); comp_handler(IBV_WC_SUCCESS, 0, bctx->up_ctx); @@ -943,56 +1016,151 @@ static void mad_read(void *opaque, const uint8_t *buf, int size) rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); } -static int mad_init(RdmaBackendDev *backend_dev) +static inline int rdmacm_mux_can_receive(void *opaque) { - struct backend_umad umad = {0}; - int ret; + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; - if (!qemu_chr_fe_backend_connected(backend_dev->mad_chr_be)) { - pr_dbg("Missing chardev for MAD multiplexer\n"); - return -EIO; + return rdmacm_mux_can_process_async(backend_dev); +} + +static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size) +{ + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; + RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf; + + pr_dbg("Got %d bytes\n", size); + pr_dbg("msg_type=%d\n", msg->hdr.msg_type); + pr_dbg("op_code=%d\n", msg->hdr.op_code); + + if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ && + msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) { + pr_dbg("Error: Not a MAD request, skipping\n"); + return; } + process_incoming_mad_req(backend_dev, msg); +} + +static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) +{ + int ret; - qemu_chr_fe_set_handlers(backend_dev->mad_chr_be, mad_can_receieve, - mad_read, NULL, NULL, backend_dev, NULL, true); + backend_dev->rdmacm_mux.chr_be = mad_chr_be; - /* Register ourself */ - memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid)); - ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad, - sizeof(umad.hdr)); - if (ret != sizeof(umad.hdr)) { - pr_dbg("Fail to register to rdma_umadmux (%d)\n", ret); + ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be); + if (!ret) { + pr_dbg("Missing chardev for MAD multiplexer\n"); + return -EIO; } qemu_mutex_init(&backend_dev->recv_mads_list.lock); backend_dev->recv_mads_list.list = qlist_new(); + enable_rdmacm_mux_async(backend_dev); + + qemu_chr_fe_set_handlers(backend_dev->rdmacm_mux.chr_be, + rdmacm_mux_can_receive, rdmacm_mux_read, NULL, + NULL, backend_dev, NULL, true); + return 0; } static void mad_fini(RdmaBackendDev *backend_dev) { + pr_dbg("Stopping MAD\n"); + disable_rdmacm_mux_async(backend_dev); + qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be); qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list)); qemu_mutex_destroy(&backend_dev->recv_mads_list.lock); } +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, + union ibv_gid *gid) +{ + union ibv_gid sgid; + int ret; + int i = 0; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + do { + ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i, + &sgid); + i++; + } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)))); + + pr_dbg("gid_index=%d\n", i - 1); + + return ret ? ret : i - 1; +} + +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG; + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); + + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret); + return -EIO; + } + + qapi_event_send_rdma_gid_status_changed(ifname, true, + gid->global.subnet_prefix, + gid->global.interface_id); + + return ret; +} + +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid) +{ + RdmaCmMuxMsg msg = {0}; + int ret; + + pr_dbg("0x%llx, 0x%llx\n", + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + + msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG; + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); + + ret = exec_rdmacm_mux_req(backend_dev, &msg); + if (ret) { + pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret); + return -EIO; + } + + qapi_event_send_rdma_gid_status_changed(ifname, false, + gid->global.subnet_prefix, + gid->global.interface_id); + + return 0; +} + int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, - CharBackend *mad_chr_be, Error **errp) + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, + Error **errp) { int i; int ret = 0; int num_ibv_devices; struct ibv_device **dev_list; - struct ibv_port_attr port_attr; memset(backend_dev, 0, sizeof(*backend_dev)); backend_dev->dev = pdev; - backend_dev->mad_chr_be = mad_chr_be; - backend_dev->backend_gid_idx = backend_gid_idx; backend_dev->port_num = port_num; backend_dev->rdma_dev_res = rdma_dev_res; @@ -1029,9 +1197,9 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, backend_dev->ib_dev = *dev_list; } - pr_dbg("Using backend device %s, port %d, gid_idx %d\n", - ibv_get_device_name(backend_dev->ib_dev), - backend_dev->port_num, backend_dev->backend_gid_idx); + pr_dbg("Using backend device %s, port %d\n", + ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num); + pr_dbg("uverb device %s\n", backend_dev->ib_dev->dev_name); backend_dev->context = ibv_open_device(backend_dev->ib_dev); if (!backend_dev->context) { @@ -1048,20 +1216,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, } pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel); - ret = ibv_query_port(backend_dev->context, backend_dev->port_num, - &port_attr); - if (ret) { - error_setg(errp, "Error %d from ibv_query_port", ret); - ret = -EIO; - goto out_destroy_comm_channel; - } - - if (backend_dev->backend_gid_idx >= port_attr.gid_tbl_len) { - error_setg(errp, "Invalid backend_gid_idx, should be less than %d", - port_attr.gid_tbl_len); - goto out_destroy_comm_channel; - } - ret = init_device_caps(backend_dev, dev_attr); if (ret) { error_setg(errp, "Failed to initialize device capabilities"); @@ -1069,20 +1223,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, goto out_destroy_comm_channel; } - ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, - backend_dev->backend_gid_idx, &backend_dev->gid); - if (ret) { - error_setg(errp, "Failed to query gid %d", - backend_dev->backend_gid_idx); - ret = -EIO; - goto out_destroy_comm_channel; - } - pr_dbg("subnet_prefix=0x%" PRIx64 "\n", - be64_to_cpu(backend_dev->gid.global.subnet_prefix)); - pr_dbg("interface_id=0x%" PRIx64 "\n", - be64_to_cpu(backend_dev->gid.global.interface_id)); - ret = mad_init(backend_dev); + ret = mad_init(backend_dev, mad_chr_be); if (ret) { error_setg(errp, "Fail to initialize mad"); ret = -EIO; diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index fc83330251..59ad2b874b 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -28,11 +28,6 @@ enum ibv_special_qp_type { IBV_QPT_GSI = 1, }; -static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev) -{ - return &dev->gid; -} - static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp) { return qp->ibqp ? qp->ibqp->qp_num : 1; @@ -51,9 +46,15 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr) int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr, - CharBackend *mad_chr_be, Error **errp); + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, + Error **errp); void rdma_backend_fini(RdmaBackendDev *backend_dev); +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid); +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, + union ibv_gid *gid); +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, + union ibv_gid *gid); void rdma_backend_start(RdmaBackendDev *backend_dev); void rdma_backend_stop(RdmaBackendDev *backend_dev); void rdma_backend_register_comp_handler(void (*handler)(int status, @@ -82,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey); int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, - uint8_t qp_type, union ibv_gid *dgid, - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, - bool use_qkey); + uint8_t qp_type, uint8_t sgid_idx, + union ibv_gid *dgid, uint32_t dqpn, + uint32_t rq_psn, uint32_t qkey, bool use_qkey); int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey); int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, @@ -94,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp); void rdma_backend_post_send(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, + uint8_t sgid_idx, union ibv_gid *sgid, union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey, void *ctx); void rdma_backend_post_recv(RdmaBackendDev *backend_dev, diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 2a7e667075..1e5c3dd3bf 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -19,6 +19,7 @@ #include "qemu/thread.h" #include "chardev/char-fe.h" #include <infiniband/verbs.h> +#include "contrib/rdmacm-mux/rdmacm-mux.h" typedef struct RdmaDeviceResources RdmaDeviceResources; @@ -34,19 +35,22 @@ typedef struct RecvMadList { QList *list; } RecvMadList; +typedef struct RdmaCmMux { + CharBackend *chr_be; + int can_receive; +} RdmaCmMux; + typedef struct RdmaBackendDev { struct ibv_device_attr dev_attr; RdmaBackendThread comp_thread; - union ibv_gid gid; PCIDevice *dev; RdmaDeviceResources *rdma_dev_res; struct ibv_device *ib_dev; struct ibv_context *context; struct ibv_comp_channel *channel; uint8_t port_num; - uint8_t backend_gid_idx; RecvMadList recv_mads_list; - CharBackend *mad_chr_be; + RdmaCmMux rdmacm_mux; } RdmaBackendDev; typedef struct RdmaBackendPD { @@ -66,6 +70,7 @@ typedef struct RdmaBackendCQ { typedef struct RdmaBackendQP { struct ibv_pd *ibpd; struct ibv_qp *ibqp; + uint8_t sgid_idx; } RdmaBackendQP; #endif diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 4f10fcabcc..250254561c 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -391,7 +391,7 @@ out_dealloc_qp: } int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, - uint32_t qp_handle, uint32_t attr_mask, + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, enum ibv_qp_state qp_state, uint32_t qkey, uint32_t rq_psn, uint32_t sq_psn) @@ -400,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int ret; pr_dbg("qpn=0x%x\n", qp_handle); + pr_dbg("qkey=0x%x\n", qkey); qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { @@ -430,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, } if (qp->qp_state == IBV_QPS_RTR) { + /* Get backend gid index */ + pr_dbg("Guest sgid_idx=%d\n", sgid_idx); + sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev, + sgid_idx); + if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */ + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx); + return -EIO; + } + ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp, - qp->qp_type, dgid, dqpn, rq_psn, - qkey, attr_mask & IBV_QP_QKEY); + qp->qp_type, sgid_idx, dgid, dqpn, + rq_psn, qkey, + attr_mask & IBV_QP_QKEY); if (ret) { return -EIO; } @@ -523,11 +534,91 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); } +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, union ibv_gid *gid, int gid_idx) +{ + int rc; + + rc = rdma_backend_add_gid(backend_dev, ifname, gid); + if (rc) { + pr_dbg("Fail to add gid\n"); + return -EINVAL; + } + + memcpy(&dev_res->ports[0].gid_tbl[gid_idx].gid, gid, sizeof(*gid)); + + return 0; +} + +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, int gid_idx) +{ + int rc; + + rc = rdma_backend_del_gid(backend_dev, ifname, + &dev_res->ports[0].gid_tbl[gid_idx].gid); + if (rc) { + pr_dbg("Fail to delete gid\n"); + return -EINVAL; + } + + memset(dev_res->ports[0].gid_tbl[gid_idx].gid.raw, 0, + sizeof(dev_res->ports[0].gid_tbl[gid_idx].gid)); + dev_res->ports[0].gid_tbl[gid_idx].backend_gid_index = -1; + + return 0; +} + +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, int sgid_idx) +{ + if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) { + pr_dbg("Got invalid sgid_idx %d\n", sgid_idx); + return -EINVAL; + } + + if (unlikely(dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index == -1)) { + dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index = + rdma_backend_get_gid_index(backend_dev, + &dev_res->ports[0].gid_tbl[sgid_idx].gid); + } + + pr_dbg("backend_gid_index=%d\n", + dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index); + + return dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index; +} + static void destroy_qp_hash_key(gpointer data) { g_bytes_unref(data); } +static void init_ports(RdmaDeviceResources *dev_res) +{ + int i, j; + + memset(dev_res->ports, 0, sizeof(dev_res->ports)); + + for (i = 0; i < MAX_PORTS; i++) { + dev_res->ports[i].state = IBV_PORT_DOWN; + for (j = 0; j < MAX_PORT_GIDS; j++) { + dev_res->ports[i].gid_tbl[j].backend_gid_index = -1; + } + } +} + +static void fini_ports(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, const char *ifname) +{ + int i; + + dev_res->ports[0].state = IBV_PORT_DOWN; + for (i = 0; i < MAX_PORT_GIDS; i++) { + rdma_rm_del_gid(dev_res, backend_dev, ifname, i); + } +} + int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, Error **errp) { @@ -545,11 +636,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, dev_attr->max_qp_wr, sizeof(void *)); res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); + init_ports(dev_res); + return 0; } -void rdma_rm_fini(RdmaDeviceResources *dev_res) +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname) { + fini_ports(dev_res, backend_dev, ifname); + res_tbl_free(&dev_res->uc_tbl); res_tbl_free(&dev_res->cqe_ctx_tbl); res_tbl_free(&dev_res->qp_tbl); diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index b4e04cc7b4..a7169b4e89 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -22,7 +22,8 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, Error **errp); -void rdma_rm_fini(RdmaDeviceResources *dev_res); +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname); int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, uint32_t *pd_handle, uint32_t ctx_handle); @@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, uint32_t recv_cq_handle, void *opaque, uint32_t *qpn); RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn); int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, - uint32_t qp_handle, uint32_t attr_mask, + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx, union ibv_gid *dgid, uint32_t dqpn, enum ibv_qp_state qp_state, uint32_t qkey, uint32_t rq_psn, uint32_t sq_psn); @@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id); +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, union ibv_gid *gid, int gid_idx); +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, + const char *ifname, int gid_idx); +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, + RdmaBackendDev *backend_dev, int sgid_idx); +static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res, + int sgid_idx) +{ + return &dev_res->ports[0].gid_tbl[sgid_idx].gid; +} + #endif diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 9b399063d3..7b3435f991 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -19,7 +19,7 @@ #include "rdma_backend_defs.h" #define MAX_PORTS 1 -#define MAX_PORT_GIDS 1 +#define MAX_PORT_GIDS 255 #define MAX_GIDS MAX_PORT_GIDS #define MAX_PORT_PKEYS 1 #define MAX_PKEYS MAX_PORT_PKEYS @@ -86,8 +86,13 @@ typedef struct RdmaRmQP { enum ibv_qp_state qp_state; } RdmaRmQP; +typedef struct RdmaRmGid { + union ibv_gid gid; + int backend_gid_index; +} RdmaRmGid; + typedef struct RdmaRmPort { - union ibv_gid gid_tbl[MAX_PORT_GIDS]; + RdmaRmGid gid_tbl[MAX_PORT_GIDS]; enum ibv_port_state state; } RdmaRmPort; diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index c4f96c4f2a..062e2cd688 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -19,6 +19,7 @@ #include "hw/pci/pci.h" #include "sysemu/dma.h" +#include "stdio.h" #define pr_info(fmt, ...) \ fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\ @@ -39,9 +40,24 @@ extern unsigned long pr_dbg_cnt; #define pr_dbg(fmt, ...) \ fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \ __func__, __LINE__, ## __VA_ARGS__) + +#define pr_dbg_buf(title, buf, len) \ +{ \ + int i; \ + char *b = g_malloc0(len * 3 + 1); \ + char b1[4]; \ + for (i = 0; i < len; i++) { \ + sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \ + strcat(b, b1); \ + } \ + pr_dbg("%s (%d): %s\n", title, len, b); \ + g_free(b); \ +} + #else #define init_pr_dbg(void) #define pr_dbg(fmt, ...) +#define pr_dbg_buf(title, buf, len) #endif void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen); diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index 15c3f28b86..b019cb843a 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -79,8 +79,8 @@ typedef struct PVRDMADev { int interrupt_mask; struct ibv_device_attr dev_attr; uint64_t node_guid; + char *backend_eth_device_name; char *backend_device_name; - uint8_t backend_gid_idx; uint8_t backend_port_num; RdmaBackendDev backend_dev; RdmaDeviceResources rdma_dev_res; diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 57d6f41ae6..a334f6205e 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -504,13 +504,16 @@ static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, rsp->hdr.response = cmd->hdr.response; rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP; - rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, - cmd->qp_handle, cmd->attr_mask, - (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, - cmd->attrs.dest_qp_num, - (enum ibv_qp_state)cmd->attrs.qp_state, - cmd->attrs.qkey, cmd->attrs.rq_psn, - cmd->attrs.sq_psn); + /* No need to verify sgid_index since it is u8 */ + + rsp->hdr.err = + rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, + cmd->attr_mask, cmd->attrs.ah_attr.grh.sgid_index, + (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid, + cmd->attrs.dest_qp_num, + (enum ibv_qp_state)cmd->attrs.qp_state, + cmd->attrs.qkey, cmd->attrs.rq_psn, + cmd->attrs.sq_psn); pr_dbg("ret=%d\n", rsp->hdr.err); return rsp->hdr.err; @@ -570,10 +573,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { struct pvrdma_cmd_create_bind *cmd = &req->create_bind; -#ifdef PVRDMA_DEBUG - __be64 *subnet = (__be64 *)&cmd->new_gid[0]; - __be64 *if_id = (__be64 *)&cmd->new_gid[8]; -#endif + int rc; + union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid; pr_dbg("index=%d\n", cmd->index); @@ -582,19 +583,24 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, } pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index, - (long long unsigned int)be64_to_cpu(*subnet), - (long long unsigned int)be64_to_cpu(*if_id)); + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), + (long long unsigned int)be64_to_cpu(gid->global.interface_id)); - /* Driver forces to one port only */ - memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid, - sizeof(cmd->new_gid)); + rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, gid, cmd->index); + if (rc < 0) { + return -EINVAL; + } /* TODO: Since drivers stores node_guid at load_dsr phase then this * assignment is not relevant, i need to figure out a way how to * retrieve MAC of our netdev */ - dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id; - pr_dbg("dev->node_guid=0x%llx\n", - (long long unsigned int)be64_to_cpu(dev->node_guid)); + if (!cmd->index) { + dev->node_guid = + dev->rdma_dev_res.ports[0].gid_tbl[0].gid.global.interface_id; + pr_dbg("dev->node_guid=0x%llx\n", + (long long unsigned int)be64_to_cpu(dev->node_guid)); + } return 0; } @@ -602,6 +608,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, union pvrdma_cmd_resp *rsp) { + int rc; + struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind; pr_dbg("index=%d\n", cmd->index); @@ -610,8 +618,13 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, return -EINVAL; } - memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0, - sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw)); + rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name, cmd->index); + + if (rc < 0) { + rsp->hdr.err = rc; + goto out; + } return 0; } diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index fc2abd34af..ac8c092db0 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -36,9 +36,9 @@ #include "pvrdma_qp_ops.h" static Property pvrdma_dev_properties[] = { - DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name), - DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1), - DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0), + DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), + DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), + DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, MAX_MR_SIZE), DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), @@ -276,17 +276,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev) pr_dbg("Initialized\n"); } -static void init_ports(PVRDMADev *dev, Error **errp) -{ - int i; - - memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports)); - - for (i = 0; i < MAX_PORTS; i++) { - dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN; - } -} - static void uninit_msix(PCIDevice *pdev, int used_vectors) { PVRDMADev *dev = PVRDMA_DEV(pdev); @@ -335,7 +324,8 @@ static void pvrdma_fini(PCIDevice *pdev) pvrdma_qp_ops_fini(); - rdma_rm_fini(&dev->rdma_dev_res); + rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, + dev->backend_eth_device_name); rdma_backend_fini(&dev->backend_dev); @@ -612,8 +602,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, - dev->backend_gid_idx, &dev->dev_attr, &dev->mad_chr, - errp); + &dev->dev_attr, &dev->mad_chr, errp); if (rc) { goto out; } @@ -623,8 +612,6 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } - init_ports(dev, errp); - rc = pvrdma_qp_ops_init(); if (rc) { goto out; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 3388be1926..2130824098 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -131,6 +131,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) RdmaRmQP *qp; PvrdmaSqWqe *wqe; PvrdmaRing *ring; + int sgid_idx; + union ibv_gid *sgid; pr_dbg("qp_handle=0x%x\n", qp_handle); @@ -156,8 +158,26 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) comp_ctx->cqe.qp = qp_handle; comp_ctx->cqe.opcode = IBV_WC_SEND; + sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); + if (!sgid) { + pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } + pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, + sgid->global.interface_id); + + sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, + &dev->backend_dev, + wqe->hdr.wr.ud.av.gid_index); + if (sgid_idx <= 0) { + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", + wqe->hdr.wr.ud.av.gid_index); + return -EIO; + } + rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, + sgid_idx, sgid, (union ibv_gid *)wqe->hdr.wr.ud.av.dgid, wqe->hdr.wr.ud.remote_qpn, wqe->hdr.wr.ud.remote_qkey, comp_ctx); |