From 4ad6429d2713924afe10cd960535600b241560a3 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 19 Dec 2019 16:19:34 -0500 Subject: IB/rdmavt: Correct comments in rdmavt_qp.h header Comments need to be with the definition of rvt_restart_sge(). Other comments were duplicated in sw/rdmavt/rc.c and were removed. Fixes: 385156c5f2a6 ("IB/hfi: Move RC functions into a header file") Link: https://lore.kernel.org/r/20191219211934.58387.88014.stgit@awfm-01.aw.intel.com Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- include/rdma/rdmavt_qp.h | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b550ae89bf85..0d5c70e2d8ab 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -640,34 +640,14 @@ static inline int rvt_cmp_msn(u32 a, u32 b) return (((int)a) - ((int)b)) << 8; } -/** - * rvt_compute_aeth - compute the AETH (syndrome + MSN) - * @qp: the queue pair to compute the AETH for - * - * Returns the AETH. - */ __be32 rvt_compute_aeth(struct rvt_qp *qp); -/** - * rvt_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush - * @aeth: the Acknowledge Extended Transport Header - * - * The QP s_lock should be held. - */ void rvt_get_credit(struct rvt_qp *qp, u32 aeth); -/** - * rvt_restart_sge - rewind the sge state for a wqe - * @ss: the sge state pointer - * @wqe: the wqe to rewind - * @len: the data length from the start of the wqe in bytes - * - * Returns the remaining data length. - */ u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len); /** + * rvt_div_round_up_mtu - round up divide * @qp - the qp pair * @len - the length * -- cgit v1.2.3 From ad9efa05a0ea2fd9960d4e163fad56279cde45b2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 12 Dec 2019 11:38:27 +0200 Subject: RDMA/cm: Delete unused CM ARP functions Clean the code by deleting ARP functions, which are not called anyway. Link: https://lore.kernel.org/r/20191212093830.316934-46-leon@kernel.org Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 66 -------------------------------------------- include/rdma/ib_cm.h | 34 ----------------------- 2 files changed, 100 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fae788e38c87..d396b987b0f2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3163,72 +3163,6 @@ deref: cm_deref_id(cm_id_priv); return -EINVAL; } -static void cm_format_apr(struct cm_apr_msg *apr_msg, - struct cm_id_private *cm_id_priv, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); - apr_msg->local_comm_id = cm_id_priv->id.local_id; - apr_msg->remote_comm_id = cm_id_priv->id.remote_id; - apr_msg->ap_status = (u8) status; - - if (info && info_length) { - apr_msg->info_length = info_length; - memcpy(apr_msg->info, info, info_length); - } - - if (private_data && private_data_len) - memcpy(apr_msg->private_data, private_data, private_data_len); -} - -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len) -{ - struct cm_id_private *cm_id_priv; - struct ib_mad_send_buf *msg; - unsigned long flags; - int ret; - - if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || - (info && info_length > IB_CM_APR_INFO_LENGTH)) - return -EINVAL; - - cm_id_priv = container_of(cm_id, struct cm_id_private, id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state != IB_CM_ESTABLISHED || - (cm_id->lap_state != IB_CM_LAP_RCVD && - cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { - ret = -EINVAL; - goto out; - } - - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, - info, info_length, private_data, private_data_len); - ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } - - cm_id->lap_state = IB_CM_LAP_IDLE; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - return ret; -} -EXPORT_SYMBOL(ib_send_cm_apr); - static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index b01a8a8d4de9..8ec482e391aa 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -499,21 +499,6 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); -/** - * ib_send_cm_lap - Sends a load alternate path request. - * @cm_id: Connection identifier associated with the load alternate path - * message. - * @alternate_path: A path record that identifies the alternate path to - * load. - * @private_data: Optional user-defined private data sent with the - * load alternate path message. - * @private_data_len: Size of the private data buffer, in bytes. - */ -int ib_send_cm_lap(struct ib_cm_id *cm_id, - struct sa_path_rec *alternate_path, - const void *private_data, - u8 private_data_len); - /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. @@ -534,25 +519,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); -/** - * ib_send_cm_apr - Sends an alternate path response message in response to - * a load alternate path request. - * @cm_id: Connection identifier associated with the alternate path response. - * @status: Reply status sent with the alternate path response. - * @info: Optional additional information sent with the alternate path - * response. - * @info_length: Size of the additional information, in bytes. - * @private_data: Optional user-defined private data sent with the - * alternate path response message. - * @private_data_len: Size of the private data buffer, in bytes. - */ -int ib_send_cm_apr(struct ib_cm_id *cm_id, - enum ib_cm_apr_status status, - void *info, - u8 info_length, - const void *private_data, - u8 private_data_len); - struct ib_cm_sidr_req_param { struct sa_path_rec *path; const struct ib_gid_attr *sgid_attr; -- cgit v1.2.3 From 3e5901cbfcc15da54f6ad148add754e7a2b2a558 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Dec 2019 15:18:15 -0500 Subject: RDMA/core: Trace points for diagnosing completion queue issues Sample trace events: kworker/u29:0-300 [007] 120.042217: cq_alloc: cq.id=4 nr_cqe=161 comp_vector=2 poll_ctx=WORKQUEUE -0 [002] 120.056292: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.056402: cq_process: cq.id=4 wake-up took 109 [us] from interrupt kworker/2:1H-482 [002] 120.056407: cq_poll: cq.id=4 requested 16, returned 1 -0 [002] 120.067503: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.067537: cq_process: cq.id=4 wake-up took 34 [us] from interrupt kworker/2:1H-482 [002] 120.067541: cq_poll: cq.id=4 requested 16, returned 1 -0 [002] 120.067657: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 120.067672: cq_process: cq.id=4 wake-up took 15 [us] from interrupt kworker/2:1H-482 [002] 120.067674: cq_poll: cq.id=4 requested 16, returned 1 ... systemd-1 [002] 122.392653: cq_schedule: cq.id=4 kworker/2:1H-482 [002] 122.392688: cq_process: cq.id=4 wake-up took 35 [us] from interrupt kworker/2:1H-482 [002] 122.392693: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.392836: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.392970: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.393083: cq_poll: cq.id=4 requested 16, returned 16 kworker/2:1H-482 [002] 122.393195: cq_poll: cq.id=4 requested 16, returned 3 Several features to note in this output: - The WCE count and context type are reported at allocation time - The CPU and kworker for each CQ is evident - The CQ's restracker ID is tagged on each trace event - CQ poll scheduling latency is measured - Details about how often single completions occur versus multiple completions are evident - The cost of the ULP's completion handler is recorded Link: https://lore.kernel.org/r/20191218201815.30584.3481.stgit@manet.1015granger.net Signed-off-by: Chuck Lever Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/cq.c | 27 +++- drivers/infiniband/core/trace.c | 14 ++ drivers/infiniband/core/verbs.c | 4 + include/rdma/ib_verbs.h | 5 + include/trace/events/rdma_core.h | 271 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 drivers/infiniband/core/trace.c create mode 100644 include/trace/events/rdma_core.h (limited to 'include/rdma') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index f22555f982e2..2b86a514c9bf 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - nldev.o restrack.o counters.o ib_core_uverbs.o + nldev.o restrack.o counters.o ib_core_uverbs.o \ + trace.o ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index bbfded6d5d3d..4f25b2400694 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -7,6 +7,8 @@ #include #include +#include + /* # of WCs to poll for with a single call to ib_poll_cq */ #define IB_POLL_BATCH 16 #define IB_POLL_BATCH_DIRECT 8 @@ -41,6 +43,7 @@ static void ib_cq_rdma_dim_work(struct work_struct *w) dim->state = DIM_START_MEASURE; + trace_cq_modify(cq, comps, usec); cq->device->ops.modify_cq(cq, comps, usec); } @@ -65,18 +68,29 @@ static void rdma_dim_init(struct ib_cq *cq) INIT_WORK(&dim->work, ib_cq_rdma_dim_work); } +static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + int rc; + + rc = ib_poll_cq(cq, num_entries, wc); + trace_cq_poll(cq, num_entries, rc); + return rc; +} + static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs, int batch) { int i, n, completed = 0; + trace_cq_process(cq); + /* * budget might be (-1) if the caller does not * want to bound this call, thus we need unsigned * minimum here. */ - while ((n = ib_poll_cq(cq, min_t(u32, batch, - budget - completed), wcs)) > 0) { + while ((n = __poll_cq(cq, min_t(u32, batch, + budget - completed), wcs)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &wcs[i]; @@ -131,8 +145,10 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH); if (completed < budget) { irq_poll_complete(&cq->iop); - if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) + if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) { + trace_cq_reschedule(cq); irq_poll_sched(&cq->iop); + } } if (dim) @@ -143,6 +159,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget) static void ib_cq_completion_softirq(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); irq_poll_sched(&cq->iop); } @@ -162,6 +179,7 @@ static void ib_cq_poll_work(struct work_struct *work) static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) { + trace_cq_schedule(cq); queue_work(cq->comp_wq, &cq->work); } @@ -239,6 +257,7 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, goto out_destroy_cq; } + trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; out_destroy_cq: @@ -248,6 +267,7 @@ out_free_wc: kfree(cq->wc); out_free_cq: kfree(cq); + trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret); return ERR_PTR(ret); } EXPORT_SYMBOL(__ib_alloc_cq_user); @@ -304,6 +324,7 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) WARN_ON_ONCE(1); } + trace_cq_free(cq); rdma_restrack_del(&cq->res); cq->device->ops.destroy_cq(cq, udata); if (cq->dim) diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c new file mode 100644 index 000000000000..6c3514beac4d --- /dev/null +++ b/drivers/infiniband/core/trace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for core RDMA functions. + * + * Author: Chuck Lever + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#define CREATE_TRACE_POINTS + +#include + +#include diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index dd765e176cdd..289b2f7a9d5e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -53,6 +53,8 @@ #include "core_priv.h" +#include + static int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); @@ -2744,6 +2746,7 @@ void ib_drain_sq(struct ib_qp *qp) qp->device->ops.drain_sq(qp); else __ib_drain_sq(qp); + trace_cq_drain_complete(qp->send_cq); } EXPORT_SYMBOL(ib_drain_sq); @@ -2772,6 +2775,7 @@ void ib_drain_rq(struct ib_qp *qp) qp->device->ops.drain_rq(qp); else __ib_drain_rq(qp); + trace_cq_drain_complete(qp->recv_cq); } EXPORT_SYMBOL(ib_drain_rq); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5608e14e3aad..42f28d39f28c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1558,6 +1558,11 @@ struct ib_cq { }; struct workqueue_struct *comp_wq; struct dim *dim; + + /* updated only by trace points */ + ktime_t timestamp; + bool interrupt; + /* * Implementation details of the RDMA core, don't use in drivers: */ diff --git a/include/trace/events/rdma_core.h b/include/trace/events/rdma_core.h new file mode 100644 index 000000000000..08f481554e7f --- /dev/null +++ b/include/trace/events/rdma_core.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trace point definitions for core RDMA functions. + * + * Author: Chuck Lever + * + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rdma_core + +#if !defined(_TRACE_RDMA_CORE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RDMA_CORE_H + +#include +#include + +/* + * enum ib_poll_context, from include/rdma/ib_verbs.h + */ +#define IB_POLL_CTX_LIST \ + ib_poll_ctx(DIRECT) \ + ib_poll_ctx(SOFTIRQ) \ + ib_poll_ctx(WORKQUEUE) \ + ib_poll_ctx_end(UNBOUND_WORKQUEUE) + +#undef ib_poll_ctx +#undef ib_poll_ctx_end + +#define ib_poll_ctx(x) TRACE_DEFINE_ENUM(IB_POLL_##x); +#define ib_poll_ctx_end(x) TRACE_DEFINE_ENUM(IB_POLL_##x); + +IB_POLL_CTX_LIST + +#undef ib_poll_ctx +#undef ib_poll_ctx_end + +#define ib_poll_ctx(x) { IB_POLL_##x, #x }, +#define ib_poll_ctx_end(x) { IB_POLL_##x, #x } + +#define rdma_show_ib_poll_ctx(x) \ + __print_symbolic(x, IB_POLL_CTX_LIST) + +/** + ** Completion Queue events + **/ + +TRACE_EVENT(cq_schedule, + TP_PROTO( + struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + cq->timestamp = ktime_get(); + cq->interrupt = true; + + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +TRACE_EVENT(cq_reschedule, + TP_PROTO( + struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + cq->timestamp = ktime_get(); + cq->interrupt = false; + + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +TRACE_EVENT(cq_process, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(bool, interrupt) + __field(s64, latency) + ), + + TP_fast_assign( + ktime_t latency = ktime_sub(ktime_get(), cq->timestamp); + + __entry->cq_id = cq->res.id; + __entry->latency = ktime_to_us(latency); + __entry->interrupt = cq->interrupt; + ), + + TP_printk("cq.id=%u wake-up took %lld [us] from %s", + __entry->cq_id, __entry->latency, + __entry->interrupt ? "interrupt" : "reschedule" + ) +); + +TRACE_EVENT(cq_poll, + TP_PROTO( + const struct ib_cq *cq, + int requested, + int rc + ), + + TP_ARGS(cq, requested, rc), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, requested) + __field(int, rc) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->requested = requested; + __entry->rc = rc; + ), + + TP_printk("cq.id=%u requested %d, returned %d", + __entry->cq_id, __entry->requested, __entry->rc + ) +); + +TRACE_EVENT(cq_drain_complete, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", + __entry->cq_id + ) +); + + +TRACE_EVENT(cq_modify, + TP_PROTO( + const struct ib_cq *cq, + u16 comps, + u16 usec + ), + + TP_ARGS(cq, comps, usec), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(unsigned int, comps) + __field(unsigned int, usec) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->comps = comps; + __entry->usec = usec; + ), + + TP_printk("cq.id=%u comps=%u usec=%u", + __entry->cq_id, __entry->comps, __entry->usec + ) +); + +TRACE_EVENT(cq_alloc, + TP_PROTO( + const struct ib_cq *cq, + int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx + ), + + TP_ARGS(cq, nr_cqe, comp_vector, poll_ctx), + + TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, nr_cqe) + __field(int, comp_vector) + __field(unsigned long, poll_ctx) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + __entry->nr_cqe = nr_cqe; + __entry->comp_vector = comp_vector; + __entry->poll_ctx = poll_ctx; + ), + + TP_printk("cq.id=%u nr_cqe=%d comp_vector=%d poll_ctx=%s", + __entry->cq_id, __entry->nr_cqe, __entry->comp_vector, + rdma_show_ib_poll_ctx(__entry->poll_ctx) + ) +); + +TRACE_EVENT(cq_alloc_error, + TP_PROTO( + int nr_cqe, + int comp_vector, + enum ib_poll_context poll_ctx, + int rc + ), + + TP_ARGS(nr_cqe, comp_vector, poll_ctx, rc), + + TP_STRUCT__entry( + __field(int, rc) + __field(int, nr_cqe) + __field(int, comp_vector) + __field(unsigned long, poll_ctx) + ), + + TP_fast_assign( + __entry->rc = rc; + __entry->nr_cqe = nr_cqe; + __entry->comp_vector = comp_vector; + __entry->poll_ctx = poll_ctx; + ), + + TP_printk("nr_cqe=%d comp_vector=%d poll_ctx=%s rc=%d", + __entry->nr_cqe, __entry->comp_vector, + rdma_show_ib_poll_ctx(__entry->poll_ctx), __entry->rc + ) +); + +TRACE_EVENT(cq_free, + TP_PROTO( + const struct ib_cq *cq + ), + + TP_ARGS(cq), + + TP_STRUCT__entry( + __field(u32, cq_id) + ), + + TP_fast_assign( + __entry->cq_id = cq->res.id; + ), + + TP_printk("cq.id=%u", __entry->cq_id) +); + +#endif /* _TRACE_RDMA_CORE_H */ + +#include -- cgit v1.2.3 From 6b57cea9221b0247ad5111b348522625e489a8e4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 13:30:22 +0200 Subject: IB/core: Let IB core distribute cache update events Currently when the low level driver notifies Pkey, GID, and port change events they are notified to the registered handlers in the order they are registered. IB core and other ULPs such as IPoIB are interested in GID, LID, Pkey change events. Since all GID queries done by ULPs are serviced by IB core, and the IB core deferes cache updates to a work queue, it is possible for other clients to see stale cache data when they handle their own events. For example, the below call tree shows how ipoib will call rdma_query_gid() concurrently with the update to the cache sitting in the WQ. mlx5_ib_handle_event() ib_dispatch_event() ib_cache_event() queue_work() -> slow cache update [..] ipoib_event() queue_work() [..] work handler ipoib_ib_dev_flush_light() __ipoib_ib_dev_flush() ipoib_dev_addr_changed_valid() rdma_query_gid() <- Returns old GID, cache not updated. Move all the event dispatch to a work queue so that the cache update is always done before any clients are notified. Fixes: f35faa4ba956 ("IB/core: Simplify ib_query_gid to always refer to cache") Link: https://lore.kernel.org/r/20191212113024.336702-3-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 121 ++++++++++++++++++++++-------------- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/device.c | 33 +++------- include/rdma/ib_verbs.h | 9 ++- 4 files changed, 92 insertions(+), 72 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d535995711c3..e55f345799e4 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -51,9 +51,8 @@ struct ib_pkey_cache { struct ib_update_work { struct work_struct work; - struct ib_device *device; - u8 port_num; - bool enforce_security; + struct ib_event event; + bool enforce_security; }; union ib_gid zgid; @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { @@ -1381,9 +1380,8 @@ err: return ret; } -static void ib_cache_update(struct ib_device *device, - u8 port, - bool enforce_security) +static int +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; @@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device, int ret; if (!rdma_is_port_valid(device, port)) - return; + return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) - return; + return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { @@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device, pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); - if (!pkey_cache) + if (!pkey_cache) { + ret = -ENOMEM; goto err; + } pkey_cache->table_len = tprops->pkey_tbl_len; @@ -1446,50 +1446,84 @@ static void ib_cache_update(struct ib_device *device, kfree(old_pkey_cache); kfree(tprops); - return; + return 0; err: kfree(pkey_cache); kfree(tprops); + return ret; +} + +static void ib_cache_event_task(struct work_struct *_work) +{ + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); + int ret; + + /* Before distributing the cache update event, first sync + * the cache. + */ + ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->enforce_security); + + /* GID event is notified already for individual GID entries by + * dispatch_gid_change_event(). Hence, notifiy for rest of the + * events. + */ + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) + ib_dispatch_event_clients(&work->event); + + kfree(work); } -static void ib_cache_task(struct work_struct *_work) +static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); - ib_cache_update(work->device, - work->port_num, - work->enforce_security); + ib_dispatch_event_clients(&work->event); kfree(work); } -static void ib_cache_event(struct ib_event_handler *handler, - struct ib_event *event) +static bool is_cache_update_event(const struct ib_event *event) +{ + return (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE); +} + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_GID_CHANGE) { - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (work) { - INIT_WORK(&work->work, ib_cache_task); - work->device = event->device; - work->port_num = event->element.port_num; - if (event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_GID_CHANGE) - work->enforce_security = true; - else - work->enforce_security = false; - - queue_work(ib_wq, &work->work); - } - } + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + if (is_cache_update_event(event)) + INIT_WORK(&work->work, ib_cache_event_task); + else + INIT_WORK(&work->work, ib_generic_event_task); + + work->event = *event; + if (event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_GID_CHANGE) + work->enforce_security = true; + + queue_work(ib_wq, &work->work); } +EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { @@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) ib_cache_update(device, p, true); - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, - device, ib_cache_event); - ib_register_event_handler(&device->cache.event_handler); return 0; } @@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { - /* The cleanup function unregisters the event handler, - * waits for all in-progress workqueue elements and cleans - * up the GID cache. This function should be called after - * the device was removed from the devices list and all - * clients were removed, so the cache exists but is + /* The cleanup function waits for all in-progress workqueue + * elements and cleans up the GID cache. This function should be + * called after the device was removed from the devices list and + * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ - ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3645e092e1c7..d657d90e618b 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +void ib_dispatch_event_clients(struct ib_event *event); #ifdef CONFIG_CGROUP_RDMA void ib_device_register_rdmacg(struct ib_device *device); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 84dd74fe13b8..c38b2b0b078a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -588,6 +588,7 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); + init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be @@ -1931,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data); * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. + * chapter 11 of the InfiniBand Architecture Specification). This + * callback occurs in workqueue context. */ void ib_register_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_register_event_handler); @@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler); */ void ib_unregister_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_unregister_event_handler); -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) +void ib_dispatch_event_clients(struct ib_event *event) { - unsigned long flags; struct ib_event_handler *handler; - spin_lock_irqsave(&event->device->event_handler_lock, flags); + down_read(&event->device->event_handler_rwsem); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); + up_read(&event->device->event_handler_rwsem); } -EXPORT_SYMBOL(ib_dispatch_event); static int iw_query_port(struct ib_device *device, u8 port_num, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 42f28d39f28c..f36fb657518f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2154,7 +2154,6 @@ struct ib_port_cache { struct ib_cache { rwlock_t lock; - struct ib_event_handler event_handler; }; struct ib_port_immutable { @@ -2632,7 +2631,11 @@ struct ib_device { struct rcu_head rcu_head; struct list_head event_handler_list; - spinlock_t event_handler_lock; + /* Protects event_handler_list */ + struct rw_semaphore event_handler_rwsem; + + /* Protects QP's event_handler calls and open_qp list */ + spinlock_t event_handler_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; @@ -2947,7 +2950,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); -void ib_dispatch_event(struct ib_event *event); +void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); -- cgit v1.2.3 From 17e1064632512db419cb9bb4555aec1763969b7d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 13:30:23 +0200 Subject: IB/core: Cut down single member ib_cache structure Given that ib_cache structure has only single member now, merge the cache lock directly in the ib_device. Link: https://lore.kernel.org/r/20191212113024.336702-4-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 30 +++++++++++++++--------------- include/rdma/ib_verbs.h | 7 ++----- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index e55f345799e4..17bfedd24cc3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1033,7 +1033,7 @@ int ib_get_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1042,7 +1042,7 @@ int ib_get_cached_pkey(struct ib_device *device, else *pkey = cache->table[index]; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1057,9 +1057,9 @@ int ib_get_cached_subnet_prefix(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *sn_pfx = device->port_data[port_num].cache.subnet_prefix; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return 0; } @@ -1079,7 +1079,7 @@ int ib_find_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1100,7 +1100,7 @@ int ib_find_cached_pkey(struct ib_device *device, ret = 0; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1119,7 +1119,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); cache = device->port_data[port_num].cache.pkey; @@ -1132,7 +1132,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device, break; } - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1148,9 +1148,9 @@ int ib_get_cached_lmc(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *lmc = device->port_data[port_num].cache.lmc; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1166,9 +1166,9 @@ int ib_get_cached_port_state(struct ib_device *device, if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - read_lock_irqsave(&device->cache.lock, flags); + read_lock_irqsave(&device->cache_lock, flags); *port_state = device->port_data[port_num].cache.port_state; - read_unlock_irqrestore(&device->cache.lock, flags); + read_unlock_irqrestore(&device->cache_lock, flags); return ret; } @@ -1428,7 +1428,7 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) } } - write_lock_irq(&device->cache.lock); + write_lock_irq(&device->cache_lock); old_pkey_cache = device->port_data[port].cache.pkey; @@ -1437,7 +1437,7 @@ ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) device->port_data[port].cache.port_state = tprops->state; device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix; - write_unlock_irq(&device->cache.lock); + write_unlock_irq(&device->cache_lock); if (enforce_security) ib_security_cache_change(device, @@ -1530,7 +1530,7 @@ int ib_cache_setup_one(struct ib_device *device) unsigned int p; int err; - rwlock_init(&device->cache.lock); + rwlock_init(&device->cache_lock); err = gid_table_setup_one(device); if (err) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f36fb657518f..37dac147a946 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2152,10 +2152,6 @@ struct ib_port_cache { enum ib_port_state port_state; }; -struct ib_cache { - rwlock_t lock; -}; - struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; @@ -2641,7 +2637,8 @@ struct ib_device { struct xarray client_data; struct mutex unregistration_lock; - struct ib_cache cache; + /* Synchronize GID, Pkey cache entries, subnet prefix, LMC */ + rwlock_t cache_lock; /** * port_data is indexed by port number */ -- cgit v1.2.3 From 40adf686128856c4add948fb002d43e2c507d1aa Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Dec 2019 13:30:24 +0200 Subject: IB/core: Rename event_handler_lock to qp_open_list_lock This lock is used to protect the qp->open_list linked list. As a side effect it seems to also globally serialize the qp event_handler, but it isn't clear if that is a deliberate design. Link: https://lore.kernel.org/r/20191212113024.336702-5-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/verbs.c | 12 ++++++------ include/rdma/ib_verbs.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index c38b2b0b078a..e4c3ea727eac 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -587,7 +587,7 @@ struct ib_device *_ib_alloc_device(size_t size) rdma_init_coredev(&device->coredev, device, &init_net); INIT_LIST_HEAD(&device->event_handler_list); - spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->qp_open_list_lock); init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 47d54c31eb2a..78b27aff2846 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1056,11 +1056,11 @@ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) struct ib_qp *qp = context; unsigned long flags; - spin_lock_irqsave(&qp->device->event_handler_lock, flags); + spin_lock_irqsave(&qp->device->qp_open_list_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); - spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) @@ -1097,9 +1097,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_add(&qp->open_list, &real_qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); return qp; } @@ -1827,9 +1827,9 @@ int ib_close_qp(struct ib_qp *qp) if (real_qp == qp) return -EINVAL; - spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); + spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags); list_del(&qp->open_list); - spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags); atomic_dec(&real_qp->usecnt); if (qp->qp_sec) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 37dac147a946..cea4e198701e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2631,7 +2631,7 @@ struct ib_device { struct rw_semaphore event_handler_rwsem; /* Protects QP's event_handler calls and open_qp list */ - spinlock_t event_handler_lock; + spinlock_t qp_open_list_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; -- cgit v1.2.3 From 8bdf9dd984c18375d1090ddeb1792511f619c5c1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 13 Jan 2020 14:33:10 +0000 Subject: RDMA/uverbs: Remove needs_kfree_rcu from uverbs_obj_type_class After device disassociation the uapi_objects are destroyed and freed, however it is still possible that core code can be holding a kref on the uobject. When it finally goes to uverbs_uobject_free() via the kref_put() it can trigger a use-after-free on the uapi_object. Since needs_kfree_rcu is a micro optimization that only benefits file uobjects, just get rid of it. There is no harm in using kfree_rcu even if it isn't required, and the number of involved objects is small. Link: https://lore.kernel.org/r/20200113143306.GA28717@ziepe.ca Signed-off-by: Michael Guralnik Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 23 +---------------------- include/rdma/uverbs_types.h | 1 - 2 files changed, 1 insertion(+), 23 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6c72773faf29..17bdbe38fdfa 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -49,13 +49,7 @@ void uverbs_uobject_get(struct ib_uobject *uobject) static void uverbs_uobject_free(struct kref *ref) { - struct ib_uobject *uobj = - container_of(ref, struct ib_uobject, ref); - - if (uobj->uapi_object->type_class->needs_kfree_rcu) - kfree_rcu(uobj, rcu); - else - kfree(uobj); + kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu); } void uverbs_uobject_put(struct ib_uobject *uobject) @@ -744,20 +738,6 @@ const struct uverbs_obj_type_class uverbs_idr_class = { .lookup_put = lookup_put_idr_uobject, .destroy_hw = destroy_hw_idr_uobject, .remove_handle = remove_handle_idr_uobject, - /* - * When we destroy an object, we first just lock it for WRITE and - * actually DESTROY it in the finalize stage. So, the problematic - * scenario is when we just started the finalize stage of the - * destruction (nothing was executed yet). Now, the other thread - * fetched the object for READ access, but it didn't lock it yet. - * The DESTROY thread continues and starts destroying the object. - * When the other thread continue - without the RCU, it would - * access freed memory. However, the rcu_read_lock delays the free - * until the rcu_read_lock of the READ operation quits. Since the - * exclusive lock of the object is still taken by the DESTROY flow, the - * READ operation will get -EBUSY and it'll just bail out. - */ - .needs_kfree_rcu = true, }; EXPORT_SYMBOL(uverbs_idr_class); @@ -920,7 +900,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .lookup_put = lookup_put_fd_uobject, .destroy_hw = destroy_hw_fd_uobject, .remove_handle = remove_handle_fd_uobject, - .needs_kfree_rcu = false, }; EXPORT_SYMBOL(uverbs_fd_class); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index d57a5ba00c74..0b0f5a5f392d 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -98,7 +98,6 @@ struct uverbs_obj_type_class { enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs); void (*remove_handle)(struct ib_uobject *uobj); - u8 needs_kfree_rcu; }; struct uverbs_obj_type { -- cgit v1.2.3 From 6898d1c661d79f4707d8ba82991b2195822780ca Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:21:53 +0200 Subject: RDMA/mlx5: Use RCU and direct refcounts to keep memory alive dispatch_event_fd() runs from a notifier with minimal locking, and relies on RCU and a file refcount to keep the uobject and eventfd alive. As the next patch wants to remove the file_operations release function from the drivers, re-organize things so that the devx_event_notifier() path uses the existing RCU to manage the lifetime of the uobject and eventfd. Move the refcount puts to a call_rcu so that the objects are guaranteed to exist and remove the indirect file refcount. Link: https://lore.kernel.org/r/1578504126-9400-2-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 11 ++++++----- drivers/infiniband/core/rdma_core.h | 15 --------------- drivers/infiniband/hw/mlx5/devx.c | 34 +++++++++++++++++----------------- include/rdma/uverbs_types.h | 12 ++++++++++++ 4 files changed, 35 insertions(+), 37 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 17bdbe38fdfa..aef6fb823206 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -42,20 +42,21 @@ #include "core_priv.h" #include "rdma_core.h" -void uverbs_uobject_get(struct ib_uobject *uobject) -{ - kref_get(&uobject->ref); -} - static void uverbs_uobject_free(struct kref *ref) { kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu); } +/* + * In order to indicate we no longer needs this uobject, uverbs_uobject_put + * is called. When the reference count is decreased, the uobject is freed. + * For example, this is used when attaching a completion channel to a CQ. + */ void uverbs_uobject_put(struct ib_uobject *uobject) { kref_put(&uobject->ref, uverbs_uobject_free); } +EXPORT_SYMBOL(uverbs_uobject_put); static int uverbs_try_lock_object(struct ib_uobject *uobj, enum rdma_lookup_mode mode) diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index e63fbda25e1d..d5d58a10bb28 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -50,21 +50,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -/* - * uverbs_uobject_get is called in order to increase the reference count on - * an uobject. This is useful when a handler wants to keep the uobject's memory - * alive, regardless if this uobject is still alive in the context's objects - * repository. Objects are put via uverbs_uobject_put. - */ -void uverbs_uobject_get(struct ib_uobject *uobject); - -/* - * In order to indicate we no longer needs this uobject, uverbs_uobject_put - * is called. When the reference count is decreased, the uobject is freed. - * For example, this is used when attaching a completion channel to a CQ. - */ -void uverbs_uobject_put(struct ib_uobject *uobject); - /* Indicate this fd is no longer used by this consumer, but its memory isn't * necessarily released yet. When the last reference is put, we release the * memory. After this call is executed, calling uverbs_uobject_get isn't diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 9d0a18cf9e5e..968fff018fc0 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -72,7 +72,6 @@ struct devx_event_subscription { struct rcu_head rcu; u64 cookie; struct devx_async_event_file *ev_file; - struct file *filp; /* Upon hot unplug we need a direct access to */ struct eventfd_ctx *eventfd; }; @@ -2032,6 +2031,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( goto err; list_add_tail(&event_sub->event_list, &sub_list); + uverbs_uobject_get(&ev_file->uobj); if (use_eventfd) { event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); @@ -2045,7 +2045,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->cookie = cookie; event_sub->ev_file = ev_file; - event_sub->filp = fd_uobj->object; /* May be needed upon cleanup the devx object/subscription */ event_sub->xa_key_level1 = key_level1; event_sub->xa_key_level2 = obj_id; @@ -2099,7 +2098,7 @@ err: if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); - + uverbs_uobject_put(&event_sub->ev_file->uobj); kfree(event_sub); } @@ -2361,17 +2360,10 @@ static void dispatch_event_fd(struct list_head *fd_list, struct devx_event_subscription *item; list_for_each_entry_rcu(item, fd_list, xa_list) { - if (!get_file_rcu(item->filp)) - continue; - - if (item->eventfd) { + if (item->eventfd) eventfd_signal(item->eventfd, 1); - fput(item->filp); - continue; - } - - deliver_event(item, data); - fput(item->filp); + else + deliver_event(item, data); } } @@ -2653,6 +2645,17 @@ static __poll_t devx_async_event_poll(struct file *filp, return pollflags; } +static void devx_free_subscription(struct rcu_head *rcu) +{ + struct devx_event_subscription *event_sub = + container_of(rcu, struct devx_event_subscription, rcu); + + if (event_sub->eventfd) + eventfd_ctx_put(event_sub->eventfd); + uverbs_uobject_put(&event_sub->ev_file->uobj); + kfree(event_sub); +} + static int devx_async_event_close(struct inode *inode, struct file *filp) { struct devx_async_event_file *ev_file = filp->private_data; @@ -2665,12 +2668,9 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) list_for_each_entry_safe(event_sub, event_sub_tmp, &ev_file->subscribed_events_list, file_list) { devx_cleanup_subscription(dev, event_sub); - if (event_sub->eventfd) - eventfd_ctx_put(event_sub->eventfd); - list_del_rcu(&event_sub->file_list); /* subscription may not be used by the read API any more */ - kfree_rcu(event_sub, rcu); + call_rcu(&event_sub->rcu, devx_free_subscription); } mutex_unlock(&dev->devx_event_table.event_xa_lock); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 0b0f5a5f392d..ca65d4704b4b 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -144,6 +144,18 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); +/* + * uverbs_uobject_get is called in order to increase the reference count on + * an uobject. This is useful when a handler wants to keep the uobject's memory + * alive, regardless if this uobject is still alive in the context's objects + * repository. Objects are put via uverbs_uobject_put. + */ +static inline void uverbs_uobject_get(struct ib_uobject *uobject) +{ + kref_get(&uobject->ref); +} +void uverbs_uobject_put(struct ib_uobject *uobject); + struct uverbs_obj_fd_type { /* * In fd based objects, uverbs_obj_type_ops points to generic -- cgit v1.2.3 From f7c8416ccea52b41e29227b3a5066540f51ee471 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:21:54 +0200 Subject: RDMA/core: Simplify destruction of FD uobjects FD uobjects have a weird split between the struct file and uobject world. Simplify this to make them pure uobjects and use a generic release method for all struct file operations. This fixes the control flow so that mlx5_cmd_cleanup_async_ctx() is always called before erasing the linked list contents to make the concurrancy simpler to understand. For this to work the uobject destruction must fence anything that it is cleaning up - the design must not rely on struct file lifetime. Only deliver_event() relies on the struct file to when adding new events to the queue, add a is_destroyed check under lock to block it. Link: https://lore.kernel.org/r/1578504126-9400-3-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 34 +++++---- drivers/infiniband/core/rdma_core.h | 8 --- drivers/infiniband/core/uverbs_main.c | 23 +----- drivers/infiniband/core/uverbs_std_types.c | 23 +++--- drivers/infiniband/core/uverbs_uapi.c | 6 +- drivers/infiniband/hw/mlx5/devx.c | 111 +++++++++++++---------------- include/rdma/uverbs_types.h | 12 ++-- 7 files changed, 94 insertions(+), 123 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index aef6fb823206..0ed0341b8e30 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -353,9 +353,9 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj, uobject = f->private_data; /* - * fget(id) ensures we are not currently running uverbs_close_fd, - * and the caller is expected to ensure that uverbs_close_fd is never - * done while a call top lookup is possible. + * fget(id) ensures we are not currently running + * uverbs_uobject_fd_release(), and the caller is expected to ensure + * that release is never done while a call to lookup is possible. */ if (f->f_op != fd_type->fops) { fput(f); @@ -548,7 +548,7 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj, { const struct uverbs_obj_fd_type *fd_type = container_of( uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); - int ret = fd_type->context_closed(uobj, why); + int ret = fd_type->destroy_object(uobj, why); if (ib_is_destroy_retryable(ret, why, uobj)) return ret; @@ -587,9 +587,9 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) /* * The kref for uobj is moved into filp->private data and put in - * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd() - * must be guaranteed to be called from the provided fops release - * callback. + * uverbs_close_fd(). Once alloc_commit() succeeds + * uverbs_uobject_fd_release() must be guaranteed to be called from + * the provided fops release callback. */ filp = anon_inode_getfile(fd_type->name, fd_type->fops, @@ -600,7 +600,7 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) uobj->object = filp; - /* Matching put will be done in uverbs_close_fd() */ + /* Matching put will be done in uverbs_uobject_fd_release() */ kref_get(&uobj->ufile->ref); /* This shouldn't be used anymore. Use the file object instead */ @@ -608,7 +608,7 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) /* * NOTE: Once we install the file we loose ownership of our kref on - * uobj. It will be put by uverbs_close_fd() + * uobj. It will be put by uverbs_uobject_fd_release() */ fd_install(fd, filp); @@ -676,7 +676,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj, struct file *filp = uobj->object; WARN_ON(mode != UVERBS_LOOKUP_READ); - /* This indirectly calls uverbs_close_fd and free the object */ + /* + * This indirectly calls uverbs_uobject_fd_release() and free the + * object + */ fput(filp); } @@ -742,9 +745,13 @@ const struct uverbs_obj_type_class uverbs_idr_class = { }; EXPORT_SYMBOL(uverbs_idr_class); -void uverbs_close_fd(struct file *f) +/* + * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct + * file_operations release method. + */ +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) { - struct ib_uobject *uobj = f->private_data; + struct ib_uobject *uobj = filp->private_data; struct ib_uverbs_file *ufile = uobj->ufile; struct uverbs_attr_bundle attrs = { .context = uobj->context, @@ -768,8 +775,9 @@ void uverbs_close_fd(struct file *f) /* Pairs with filp->private_data in alloc_begin_fd_uobject */ uverbs_uobject_put(uobj); + return 0; } -EXPORT_SYMBOL(uverbs_close_fd); +EXPORT_SYMBOL(uverbs_uobject_fd_release); /* * Drop the ucontext off the ufile and completely disconnect it from the diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index d5d58a10bb28..92694253e776 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -50,14 +50,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -/* Indicate this fd is no longer used by this consumer, but its memory isn't - * necessarily released yet. When the last reference is put, we release the - * memory. After this call is executed, calling uverbs_uobject_get isn't - * allowed. - * This must be called from the release file_operations of the file! - */ -void uverbs_close_fd(struct file *f); - /* * Get an ib_uobject that corresponds to the given id from ufile, assuming * the object is from the given type. Lock it to the required access when diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 8f5de4dcad97..da56fda259fd 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -373,32 +373,11 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp) return 0; } -static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct ib_uverbs_completion_event_file *file = container_of( - uobj, struct ib_uverbs_completion_event_file, uobj); - struct ib_uverbs_event *entry, *tmp; - - spin_lock_irq(&file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) { - if (entry->counter) - list_del(&entry->obj_list); - kfree(entry); - } - file->ev_queue.is_closed = 1; - spin_unlock_irq(&file->ev_queue.lock); - - uverbs_close_fd(filp); - - return 0; -} - const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_comp_event_read, .poll = ib_uverbs_comp_event_poll, - .release = ib_uverbs_comp_event_close, + .release = uverbs_uobject_fd_release, .fasync = ib_uverbs_comp_event_fasync, .llseek = no_llseek, }; diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 35b2e2c640cc..def038a0fe77 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -202,22 +202,29 @@ static int uverbs_free_pd(struct ib_uobject *uobject, return 0; } -static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int +uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { - struct ib_uverbs_completion_event_file *comp_event_file = + struct ib_uverbs_completion_event_file *file = container_of(uobj, struct ib_uverbs_completion_event_file, uobj); - struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; + struct ib_uverbs_event_queue *event_queue = &file->ev_queue; + struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&event_queue->lock); event_queue->is_closed = 1; spin_unlock_irq(&event_queue->lock); + wake_up_interruptible(&event_queue->poll_wait); + kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); - if (why == RDMA_REMOVE_DRIVER_REMOVE) { - wake_up_interruptible(&event_queue->poll_wait); - kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN); + spin_lock_irq(&event_queue->lock); + list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); } + spin_unlock_irq(&event_queue->lock); return 0; }; @@ -230,7 +237,7 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler); DECLARE_UVERBS_NAMED_OBJECT( UVERBS_OBJECT_COMP_CHANNEL, UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), - uverbs_hot_unplug_completion_event_file, + uverbs_completion_event_file_destroy_uobj, &uverbs_event_fops, "[infinibandevent]", O_RDONLY)); diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 00c547887132..9b84a126187a 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -195,9 +195,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi, * disassociation, and the FD types require the driver to use * struct file_operations.owner to prevent the driver module * code from unloading while the file is open. This provides - * enough safety that uverbs_close_fd() will continue to work. - * Drivers using FD are responsible to handle disassociation of - * the device on their own. + * enough safety that uverbs_uobject_fd_release() will + * continue to work. Drivers using FD are responsible to + * handle disassociation of the device on their own. */ if (WARN_ON(is_driver && obj->type_attrs->type_class != &uverbs_idr_class && diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 968fff018fc0..02125d82b96d 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2328,6 +2328,9 @@ static int deliver_event(struct devx_event_subscription *event_sub, return 0; } + /* is_destroyed is ignored here because we don't have any memory + * allocation to clean up for the omit_data case + */ list_add_tail(&event_sub->event_list, &ev_file->event_list); spin_unlock_irqrestore(&ev_file->lock, flags); wake_up_interruptible(&ev_file->poll_wait); @@ -2347,7 +2350,10 @@ static int deliver_event(struct devx_event_subscription *event_sub, memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe)); spin_lock_irqsave(&ev_file->lock, flags); - list_add_tail(&event_data->list, &ev_file->event_list); + if (!ev_file->is_destroyed) + list_add_tail(&event_data->list, &ev_file->event_list); + else + kfree(event_data); spin_unlock_irqrestore(&ev_file->lock, flags); wake_up_interruptible(&ev_file->poll_wait); @@ -2501,23 +2507,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf, return ret; } -static int devx_async_cmd_event_close(struct inode *inode, struct file *filp) -{ - struct ib_uobject *uobj = filp->private_data; - struct devx_async_cmd_event_file *comp_ev_file = container_of( - uobj, struct devx_async_cmd_event_file, uobj); - struct devx_async_data *entry, *tmp; - - spin_lock_irq(&comp_ev_file->ev_queue.lock); - list_for_each_entry_safe(entry, tmp, - &comp_ev_file->ev_queue.event_list, list) - kvfree(entry); - spin_unlock_irq(&comp_ev_file->ev_queue.lock); - - uverbs_close_fd(filp); - return 0; -} - static __poll_t devx_async_cmd_event_poll(struct file *filp, struct poll_table_struct *wait) { @@ -2541,7 +2530,7 @@ static const struct file_operations devx_async_cmd_event_fops = { .owner = THIS_MODULE, .read = devx_async_cmd_event_read, .poll = devx_async_cmd_event_poll, - .release = devx_async_cmd_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; @@ -2656,78 +2645,74 @@ static void devx_free_subscription(struct rcu_head *rcu) kfree(event_sub); } -static int devx_async_event_close(struct inode *inode, struct file *filp) -{ - struct devx_async_event_file *ev_file = filp->private_data; - struct devx_event_subscription *event_sub, *event_sub_tmp; - struct devx_async_event_data *entry, *tmp; - struct mlx5_ib_dev *dev = ev_file->dev; - - mutex_lock(&dev->devx_event_table.event_xa_lock); - /* delete the subscriptions which are related to this FD */ - list_for_each_entry_safe(event_sub, event_sub_tmp, - &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(dev, event_sub); - list_del_rcu(&event_sub->file_list); - /* subscription may not be used by the read API any more */ - call_rcu(&event_sub->rcu, devx_free_subscription); - } - - mutex_unlock(&dev->devx_event_table.event_xa_lock); - - /* free the pending events allocation */ - if (!ev_file->omit_data) { - spin_lock_irq(&ev_file->lock); - list_for_each_entry_safe(entry, tmp, - &ev_file->event_list, list) - kfree(entry); /* read can't come any more */ - spin_unlock_irq(&ev_file->lock); - } - - uverbs_close_fd(filp); - put_device(&dev->ib_dev.dev); - return 0; -} - static const struct file_operations devx_async_event_fops = { .owner = THIS_MODULE, .read = devx_async_event_read, .poll = devx_async_event_poll, - .release = devx_async_event_close, + .release = uverbs_uobject_fd_release, .llseek = no_llseek, }; -static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_cmd_event_file *comp_ev_file = container_of(uobj, struct devx_async_cmd_event_file, uobj); struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue; + struct devx_async_data *entry, *tmp; spin_lock_irq(&ev_queue->lock); ev_queue->is_destroyed = 1; spin_unlock_irq(&ev_queue->lock); - - if (why == RDMA_REMOVE_DRIVER_REMOVE) - wake_up_interruptible(&ev_queue->poll_wait); + wake_up_interruptible(&ev_queue->poll_wait); mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx); + + spin_lock_irq(&comp_ev_file->ev_queue.lock); + list_for_each_entry_safe(entry, tmp, + &comp_ev_file->ev_queue.event_list, list) + kvfree(entry); + spin_unlock_irq(&comp_ev_file->ev_queue.lock); return 0; }; -static int devx_hot_unplug_async_event_file(struct ib_uobject *uobj, - enum rdma_remove_reason why) +static int devx_async_event_destroy_uobj(struct ib_uobject *uobj, + enum rdma_remove_reason why) { struct devx_async_event_file *ev_file = container_of(uobj, struct devx_async_event_file, uobj); + struct devx_event_subscription *event_sub, *event_sub_tmp; + struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; spin_lock_irq(&ev_file->lock); ev_file->is_destroyed = 1; spin_unlock_irq(&ev_file->lock); - wake_up_interruptible(&ev_file->poll_wait); + + mutex_lock(&dev->devx_event_table.event_xa_lock); + /* delete the subscriptions which are related to this FD */ + list_for_each_entry_safe(event_sub, event_sub_tmp, + &ev_file->subscribed_events_list, file_list) { + devx_cleanup_subscription(dev, event_sub); + list_del_rcu(&event_sub->file_list); + /* subscription may not be used by the read API any more */ + call_rcu(&event_sub->rcu, devx_free_subscription); + } + mutex_unlock(&dev->devx_event_table.event_xa_lock); + + /* free the pending events allocation */ + if (!ev_file->omit_data) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(entry, tmp, + &ev_file->event_list, list) + kfree(entry); /* read can't come any more */ + spin_unlock_irq(&ev_file->lock); + } + + put_device(&dev->ib_dev.dev); return 0; }; @@ -2913,7 +2898,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file), - devx_hot_unplug_async_cmd_event_file, + devx_async_cmd_event_destroy_uobj, &devx_async_cmd_event_fops, "[devx_async_cmd]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)); @@ -2931,7 +2916,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_OBJECT( MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD, UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file), - devx_hot_unplug_async_event_file, + devx_async_event_destroy_uobj, &devx_async_event_fops, "[devx_async_event]", O_RDONLY), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index ca65d4704b4b..b633fa2543e3 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -161,11 +161,11 @@ struct uverbs_obj_fd_type { * In fd based objects, uverbs_obj_type_ops points to generic * fd operations. In order to specialize the underlying types (e.g. * completion_channel), we use fops, name and flags for fd creation. - * context_closed is called when the context is closed either when - * the driver is removed or the process terminated. + * destroy_object is called when the uobject is to be destroyed, + * because the driver is removed or the FD is closed. */ struct uverbs_obj_type type; - int (*context_closed)(struct ib_uobject *uobj, + int (*destroy_object)(struct ib_uobject *uobj, enum rdma_remove_reason why); const struct file_operations *fops; const char *name; @@ -174,11 +174,11 @@ struct uverbs_obj_fd_type { extern const struct uverbs_obj_type_class uverbs_idr_class; extern const struct uverbs_obj_type_class uverbs_fd_class; -void uverbs_close_fd(struct file *f); +int uverbs_uobject_fd_release(struct inode *inode, struct file *filp); #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\ +#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \ ((&((const struct uverbs_obj_fd_type) \ {.type = { \ .type_class = &uverbs_fd_class, \ @@ -186,7 +186,7 @@ void uverbs_close_fd(struct file *f); UVERBS_BUILD_BUG_ON((_obj_size) < \ sizeof(struct ib_uobject)), \ }, \ - .context_closed = _context_closed, \ + .destroy_object = _destroy_object, \ .fops = _fops, \ .name = _name, \ .flags = _flags}))->type) -- cgit v1.2.3 From 849e149063bd10eb6211c14617491a0bc9516c2f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:21:56 +0200 Subject: RDMA/core: Do not allow alloc_commit to fail This is a left over from an earlier version that creates a lot of complexity for error unwind, particularly for FD uobjects. The only reason this was done is so that anon_inode_get_file() could be called with the final fops and a fully setup uobject. Both need to be setup since unwinding anon_inode_get_file() via fput will call the driver's release(). Now that the driver does not provide release, we no longer need to worry about this complicated sequence, simply create the struct file at the start and allow the core code's release function to deal with the abort case. This allows all the confusing error paths around commit to be removed. Link: https://lore.kernel.org/r/1578504126-9400-5-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 118 ++++++++++++++++----------------- drivers/infiniband/core/rdma_core.h | 21 +----- drivers/infiniband/core/uverbs_cmd.c | 40 ++++++----- drivers/infiniband/core/uverbs_ioctl.c | 45 ++++--------- include/rdma/uverbs_std_types.h | 10 --- include/rdma/uverbs_types.h | 6 +- 6 files changed, 99 insertions(+), 141 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 0ed0341b8e30..a9f5263c9559 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -130,7 +130,11 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); - if (uobj->object) { + if (reason == RDMA_REMOVE_ABORT) { + WARN_ON(!list_empty(&uobj->list)); + WARN_ON(!uobj->context); + uobj->uapi_object->type_class->alloc_abort(uobj); + } else if (uobj->object) { ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, attrs); if (ret) { @@ -146,12 +150,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, uobj->object = NULL; } - if (reason == RDMA_REMOVE_ABORT) { - WARN_ON(!list_empty(&uobj->list)); - WARN_ON(!uobj->context); - uobj->uapi_object->type_class->alloc_abort(uobj); - } - uobj->context = NULL; /* @@ -450,22 +448,40 @@ static struct ib_uobject * alloc_begin_fd_uobject(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile) { + const struct uverbs_obj_fd_type *fd_type = + container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); int new_fd; struct ib_uobject *uobj; + struct file *filp; + + if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release)) + return ERR_PTR(-EINVAL); new_fd = get_unused_fd_flags(O_CLOEXEC); if (new_fd < 0) return ERR_PTR(new_fd); uobj = alloc_uobj(ufile, obj); - if (IS_ERR(uobj)) { - put_unused_fd(new_fd); - return uobj; + if (IS_ERR(uobj)) + goto err_fd; + + /* Note that uverbs_uobject_fd_release() is called during abort */ + filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL, + fd_type->flags); + if (IS_ERR(filp)) { + uobj = ERR_CAST(filp); + goto err_uobj; } + uobj->object = filp; uobj->id = new_fd; uobj->ufile = ufile; + return uobj; +err_uobj: + uverbs_uobject_put(uobj); +err_fd: + put_unused_fd(new_fd); return uobj; } @@ -539,6 +555,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj) static void alloc_abort_fd_uobject(struct ib_uobject *uobj) { + struct file *filp = uobj->object; + + fput(filp); put_unused_fd(uobj->id); } @@ -560,7 +579,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj) { } -static int alloc_commit_idr_uobject(struct ib_uobject *uobj) +static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { struct ib_uverbs_file *ufile = uobj->ufile; void *old; @@ -574,31 +593,12 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj) */ old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL); WARN_ON(old != NULL); - - return 0; } -static int alloc_commit_fd_uobject(struct ib_uobject *uobj) +static void alloc_commit_fd_uobject(struct ib_uobject *uobj) { - const struct uverbs_obj_fd_type *fd_type = container_of( - uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type); int fd = uobj->id; - struct file *filp; - - /* - * The kref for uobj is moved into filp->private data and put in - * uverbs_close_fd(). Once alloc_commit() succeeds - * uverbs_uobject_fd_release() must be guaranteed to be called from - * the provided fops release callback. - */ - filp = anon_inode_getfile(fd_type->name, - fd_type->fops, - uobj, - fd_type->flags); - if (IS_ERR(filp)) - return PTR_ERR(filp); - - uobj->object = filp; + struct file *filp = uobj->object; /* Matching put will be done in uverbs_uobject_fd_release() */ kref_get(&uobj->ufile->ref); @@ -610,9 +610,8 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * NOTE: Once we install the file we loose ownership of our kref on * uobj. It will be put by uverbs_uobject_fd_release() */ + filp->private_data = uobj; fd_install(fd, filp); - - return 0; } /* @@ -620,19 +619,13 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj) * caller can no longer assume uobj is valid. If this function fails it * destroys the uboject, including the attached HW object. */ -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs) +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs) { struct ib_uverbs_file *ufile = attrs->ufile; - int ret; /* alloc_commit consumes the uobj kref */ - ret = uobj->uapi_object->type_class->alloc_commit(uobj); - if (ret) { - uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); - up_read(&ufile->hw_destroy_rwsem); - return ret; - } + uobj->uapi_object->type_class->alloc_commit(uobj); /* kref is held so long as the uobj is on the uobj list. */ uverbs_uobject_get(uobj); @@ -645,8 +638,6 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); - - return 0; } /* @@ -658,7 +649,6 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, { struct ib_uverbs_file *ufile = uobj->ufile; - uobj->object = NULL; uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ @@ -751,14 +741,23 @@ EXPORT_SYMBOL(uverbs_idr_class); */ int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) { - struct ib_uobject *uobj = filp->private_data; - struct ib_uverbs_file *ufile = uobj->ufile; - struct uverbs_attr_bundle attrs = { - .context = uobj->context, - .ufile = ufile, - }; + struct ib_uverbs_file *ufile; + struct ib_uobject *uobj; + + /* + * This can only happen if the fput came from alloc_abort_fd_uobject() + */ + if (!filp->private_data) + return 0; + uobj = filp->private_data; + ufile = uobj->ufile; if (down_read_trylock(&ufile->hw_destroy_rwsem)) { + struct uverbs_attr_bundle attrs = { + .context = uobj->context, + .ufile = ufile, + }; + /* * lookup_get_fd_uobject holds the kref on the struct file any * time a FD uobj is locked, which prevents this release @@ -770,7 +769,7 @@ int uverbs_uobject_fd_release(struct inode *inode, struct file *filp) up_read(&ufile->hw_destroy_rwsem); } - /* Matches the get in alloc_begin_fd_uobject */ + /* Matches the get in alloc_commit_fd_uobject() */ kref_put(&ufile->ref, ib_uverbs_release_file); /* Pairs with filp->private_data in alloc_begin_fd_uobject */ @@ -938,12 +937,10 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, } } -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs) +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs) { - int ret = 0; - /* * refcounts should be handled at the object level and not at the * uobject level. Refcounts of the objects themselves are done in @@ -963,14 +960,11 @@ int uverbs_finalize_object(struct ib_uobject *uobj, break; case UVERBS_ACCESS_NEW: if (commit) - ret = rdma_alloc_commit_uobject(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); else rdma_alloc_abort_uobject(uobj, attrs); break; default: WARN_ON(true); - ret = -EOPNOTSUPP; } - - return ret; } diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 92694253e776..29f905e8c2a8 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -63,24 +63,9 @@ struct ib_uobject * uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, s64 id, struct uverbs_attr_bundle *attrs); -/* - * Note that certain finalize stages could return a status: - * (a) alloc_commit could return a failure if the object is committed at the - * same time when the context is destroyed. - * (b) remove_commit could fail if the object wasn't destroyed successfully. - * Since multiple objects could be finalized in one transaction, it is very NOT - * recommended to have several finalize actions which have side effects. - * For example, it's NOT recommended to have a certain action which has both - * a commit action and a destroy action or two destroy objects in the same - * action. The rule of thumb is to have one destroy or commit action with - * multiple lookups. - * The first non zero return value of finalize_object is returned from this - * function. For example, this could happen when we couldn't destroy an - * object. - */ -int uverbs_finalize_object(struct ib_uobject *uobj, - enum uverbs_obj_access access, bool commit, - struct uverbs_attr_bundle *attrs); +void uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, bool commit, + struct uverbs_attr_bundle *attrs); int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 06ed32c8662f..74f6ae475ca0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -446,7 +446,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) if (ret) goto err_copy; - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dealloc_pd_user(pd, uverbs_get_cleared_udata(attrs)); @@ -642,7 +643,8 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs) mutex_unlock(&ibudev->xrcd_tree_mutex); - return uobj_alloc_commit(&obj->uobject, attrs); + rdma_alloc_commit_uobject(&obj->uobject, attrs); + return 0; err_copy: if (inode) { @@ -774,7 +776,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs)); @@ -928,7 +931,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: uverbs_dealloc_mw(mw); @@ -980,7 +984,8 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs) return ret; } - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; } static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, @@ -1049,9 +1054,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - ret = uobj_alloc_commit(&obj->uobject, attrs); - if (ret) - return ERR_PTR(ret); + rdma_alloc_commit_uobject(&obj->uobject, attrs); return obj; err_cb: @@ -1491,7 +1494,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ind_tbl) uobj_put_obj_read(ind_tbl); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_cb: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -1623,7 +1627,8 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) qp->uobject = &obj->uevent.uobject; uobj_put_read(xrcd_uobj); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_destroy: ib_destroy_qp_user(qp, uverbs_get_cleared_udata(attrs)); @@ -2465,7 +2470,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: rdma_destroy_ah_user(ah, RDMA_DESTROY_AH_SLEEPABLE, @@ -2977,7 +2983,8 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) uobj_put_obj_read(pd); uobj_put_obj_read(cq); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); @@ -3151,7 +3158,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) for (j = 0; j < num_read_wqs; j++) uobj_put_obj_read(wqs[j]); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: ib_destroy_rwq_ind_table(rwq_ind_tbl); @@ -3329,7 +3337,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); - return uobj_alloc_commit(uobj, attrs); + rdma_alloc_commit_uobject(uobj, attrs); + return 0; err_copy: if (!qp->device->ops.destroy_flow(flow_id)) atomic_dec(&qp->usecnt); @@ -3477,7 +3486,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_obj_read(attr.ext.cq); uobj_put_obj_read(pd); - return uobj_alloc_commit(&obj->uevent.uobject, attrs); + rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); + return 0; err_copy: ib_destroy_srq_user(srq, uverbs_get_cleared_udata(attrs)); diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 269938f59d3f..538affbc517e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -220,24 +220,17 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle, return ret; } -static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, - struct uverbs_objs_arr_attr *attr, - bool commit, struct uverbs_attr_bundle *attrs) +static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi, + struct uverbs_objs_arr_attr *attr, + bool commit, + struct uverbs_attr_bundle *attrs) { const struct uverbs_attr_spec *spec = &attr_uapi->spec; - int current_ret; - int ret = 0; size_t i; - for (i = 0; i != attr->len; i++) { - current_ret = uverbs_finalize_object(attr->uobjects[i], - spec->u2.objs_arr.access, - commit, attrs); - if (!ret) - ret = current_ret; - } - - return ret; + for (i = 0; i != attr->len; i++) + uverbs_finalize_object(attr->uobjects[i], + spec->u2.objs_arr.access, commit, attrs); } static int uverbs_process_attr(struct bundle_priv *pbundle, @@ -495,26 +488,22 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle, return ret; } -static int bundle_destroy(struct bundle_priv *pbundle, bool commit) +static void bundle_destroy(struct bundle_priv *pbundle, bool commit) { unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len; struct bundle_alloc_head *memblock; unsigned int i; - int ret = 0; /* fast path for simple uobjects */ i = -1; while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, i + 1)) < key_bitmap_len) { struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; - int current_ret; - current_ret = uverbs_finalize_object( + uverbs_finalize_object( attr->obj_attr.uobject, attr->obj_attr.attr_elm->spec.u.obj.access, commit, &pbundle->bundle); - if (!ret) - ret = current_ret; } i = -1; @@ -523,7 +512,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) struct uverbs_attr *attr = &pbundle->bundle.attrs[i]; const struct uverbs_api_attr *attr_uapi; void __rcu **slot; - int current_ret; slot = uapi_get_attr_for_method( pbundle, @@ -534,11 +522,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) attr_uapi = rcu_dereference_protected(*slot, true); if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) { - current_ret = uverbs_free_idrs_array( - attr_uapi, &attr->objs_arr_attr, commit, - &pbundle->bundle); - if (!ret) - ret = current_ret; + uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr, + commit, &pbundle->bundle); } } @@ -548,8 +533,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit) memblock = memblock->next; kvfree(tmp); } - - return ret; } static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, @@ -562,7 +545,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, struct bundle_priv *pbundle; struct bundle_priv onstack; void __rcu **slot; - int destroy_ret; int ret; if (unlikely(hdr->driver_id != uapi->driver_id)) @@ -610,10 +592,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile, memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize)); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); - destroy_ret = bundle_destroy(pbundle, ret == 0); - if (unlikely(destroy_ret && !ret)) - return destroy_ret; - + bundle_destroy(pbundle, ret == 0); return ret; } diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 05eabfd5d0d3..c6bcaad4a8e0 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -104,16 +104,6 @@ static inline void uobj_put_write(struct ib_uobject *uobj) rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); } -static inline int __must_check -uobj_alloc_commit(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) -{ - int ret = rdma_alloc_commit_uobject(uobj, attrs); - - if (ret) - return ret; - return 0; -} - static inline void uobj_alloc_abort(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) { diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index b633fa2543e3..01d59df4e9e7 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -85,7 +85,7 @@ struct uverbs_obj_type_class { struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile); /* This consumes the kref on uobj */ - int (*alloc_commit)(struct ib_uobject *uobj); + void (*alloc_commit)(struct ib_uobject *uobj); /* This does not consume the kref on uobj */ void (*alloc_abort)(struct ib_uobject *uobj); @@ -141,8 +141,8 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj, - struct uverbs_attr_bundle *attrs); +void rdma_alloc_commit_uobject(struct ib_uobject *uobj, + struct uverbs_attr_bundle *attrs); /* * uverbs_uobject_get is called in order to increase the reference count on -- cgit v1.2.3 From 5bd48c18c8cea0154800b40ec75201fa71684312 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:21:58 +0200 Subject: RDMA/core: Do not erase the type of ib_cq.uobject This is a struct ib_ucq_object pointer, instead of using container_of() all over the place just store it with its actual type. Link: https://lore.kernel.org/r/1578504126-9400-7-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/nldev.c | 3 +- drivers/infiniband/core/uverbs_cmd.c | 40 +++++++++++++++++---------- drivers/infiniband/core/uverbs_main.c | 7 ++--- drivers/infiniband/core/uverbs_std_types_cq.c | 2 +- include/rdma/ib_verbs.h | 4 ++- 5 files changed, 34 insertions(+), 22 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index cbf6041a5d4a..37b433aa7306 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -41,6 +41,7 @@ #include "core_priv.h" #include "cma_priv.h" #include "restrack.h" +#include "uverbs.h" typedef int (*res_fill_func_t)(struct sk_buff*, bool, struct rdma_restrack_entry*, uint32_t); @@ -599,7 +600,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, goto err; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, - cq->uobject->context->res.id)) + cq->uobject->uevent.uobject.context->res.id)) goto err; if (fill_res_name_pid(msg, res)) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3a2a27830ae2..b08679a5da25 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1029,7 +1029,7 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs, goto err_file; } cq->device = ib_dev; - cq->uobject = &obj->uevent.uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; @@ -1134,7 +1134,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_response(attrs, &resp, sizeof(resp)); out: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1217,7 +1218,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs) ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT); out_put: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -1238,8 +1240,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs) ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return 0; } @@ -1484,9 +1486,11 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) uobj_put_obj_read(srq); if (ind_tbl) @@ -1503,9 +1507,11 @@ err_put: if (pd) uobj_put_obj_read(pd); if (scq) - uobj_put_obj_read(scq); + rdma_lookup_put_uobject(&scq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (rcq && rcq != scq) - uobj_put_obj_read(rcq); + rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (srq) uobj_put_obj_read(srq); if (ind_tbl) @@ -2980,14 +2986,16 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_copy; uobj_put_obj_read(pd); - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); return 0; err_copy: ib_destroy_wq(wq, uverbs_get_cleared_udata(attrs)); err_put_cq: - uobj_put_obj_read(cq); + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_pd: uobj_put_obj_read(pd); err_uobj: @@ -3481,7 +3489,8 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, uobj_put_read(xrcd_uobj); if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); uobj_put_obj_read(pd); rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); @@ -3498,7 +3507,8 @@ err_put: err_put_cq: if (ib_srq_has_cq(cmd->srq_type)) - uobj_put_obj_read(attr.ext.cq); + rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { @@ -3714,8 +3724,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs) ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period); - uobj_put_obj_read(cq); - + rdma_lookup_put_uobject(&cq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 940bb291a57d..2849bf40bc97 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -408,9 +408,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) return; } - uobj = container_of(cq->uobject, struct ib_ucq_object, uevent.uobject); + uobj = cq->uobject; - entry->desc.comp.cq_handle = cq->uobject->user_handle; + entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &ev_queue->event_list); @@ -457,8 +457,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj = container_of( - event->element.cq->uobject, struct ib_uevent_object, uobject); + struct ib_uevent_object *uobj = &event->element.cq->uobject->uevent; ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle, event->event, &uobj->event_list, diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index fbc605a95e51..a41c758042cc 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -116,7 +116,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( } cq->device = ib_dev; - cq->uobject = &obj->uevent.uobject; + cq->uobject = obj; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cea4e198701e..e9ab986ab323 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -77,6 +77,8 @@ extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; extern struct workqueue_struct *ib_comp_unbound_wq; +struct ib_ucq_object; + __printf(3, 4) __cold void ibdev_printk(const char *level, const struct ib_device *ibdev, const char *format, ...); @@ -1544,7 +1546,7 @@ enum ib_poll_context { struct ib_cq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_ucq_object *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; -- cgit v1.2.3 From 620d3f8176cbb3a9c0c7962a05fb15310a9998d4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:21:59 +0200 Subject: RDMA/core: Do not erase the type of ib_qp.uobject This is a struct ib_uqp_object pointer, instead of using container_of() all over the place just store it with its actual type. Link: https://lore.kernel.org/r/1578504126-9400-8-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/core_priv.h | 2 +- drivers/infiniband/core/uverbs_cmd.c | 34 +++++++++++++++++++++------------- drivers/infiniband/core/uverbs_main.c | 3 +-- include/rdma/ib_verbs.h | 3 ++- 4 files changed, 25 insertions(+), 17 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index d657d90e618b..b1457b3464d3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -321,7 +321,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, struct ib_pd *pd, struct ib_qp_init_attr *attr, struct ib_udata *udata, - struct ib_uobject *uobj) + struct ib_uqp_object *uobj) { enum ib_qp_type qp_type = attr->qp_type; struct ib_qp *qp; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b08679a5da25..4d84d083847c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1424,7 +1424,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, qp = ib_create_qp(pd, &attr); else qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata, - &obj->uevent.uobject); + obj); if (IS_ERR(qp)) { ret = PTR_ERR(qp); @@ -1457,7 +1457,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, atomic_inc(&ind_tbl->usecnt); } else { /* It is done in _ib_create_qp for other QP types */ - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; } obj->uevent.uobject.object = qp; @@ -1628,7 +1628,7 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs) obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); - qp->uobject = &obj->uevent.uobject; + qp->uobject = obj; uobj_put_read(xrcd_uobj); rdma_alloc_commit_uobject(&obj->uevent.uobject, attrs); @@ -1693,7 +1693,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs) ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) goto out; @@ -1930,7 +1931,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, &attrs->driver_udata); release_qp: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); out: kfree(attr); @@ -2194,7 +2196,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs) ret = ret2; out_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); while (wr) { if (is_ud && ud_wr(wr)->ah) @@ -2336,7 +2339,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) { for (next = wr; next; next = next->next) { ++resp.bad_wr; @@ -2517,7 +2521,7 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2544,7 +2548,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -2566,7 +2571,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) if (!qp) return -EINVAL; - obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + obj = qp->uobject; mutex_lock(&obj->mcast_lock); list_for_each_entry(mcast, &obj->mcast_list, list) @@ -2587,7 +2592,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs) out_put: mutex_unlock(&obj->mcast_lock); - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3339,7 +3345,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) if (err) goto err_copy; - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); @@ -3353,7 +3360,8 @@ err_free: err_free_flow_attr: kfree(flow_attr); err_put: - uobj_put_obj_read(qp); + rdma_lookup_put_uobject(&qp->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_uobj: uobj_alloc_abort(uobj, attrs); err_free_attr: diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 2849bf40bc97..12da68fca1c1 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -472,8 +472,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) if (!event->element.qp->uobject) return; - uobj = container_of(event->element.qp->uobject, - struct ib_uevent_object, uobject); + uobj = &event->element.qp->uobject->uevent; ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e9ab986ab323..b4dbc5f9636a 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -72,6 +72,7 @@ #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN struct ib_umem_odp; +struct ib_uqp_object; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -1735,7 +1736,7 @@ struct ib_qp { atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; - struct ib_uobject *uobject; + struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ -- cgit v1.2.3 From 9fbe334c6a67c3c09f187e4b9b0e6eaf0ad31429 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:22:00 +0200 Subject: RDMA/core: Do not erase the type of ib_srq.uobject This is a struct ib_usrq_object pointer, instead of using container_of() all over the place just store it with its actual type. Link: https://lore.kernel.org/r/1578504126-9400-9-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 17 +++++++++++------ drivers/infiniband/core/uverbs_main.c | 3 +-- include/rdma/ib_verbs.h | 3 ++- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d84d083847c..8350e023d3b5 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1492,7 +1492,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs, rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); @@ -1513,7 +1514,8 @@ err_put: rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject, UVERBS_LOOKUP_READ); if (srq) - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ind_tbl) uobj_put_obj_read(ind_tbl); @@ -2390,7 +2392,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs) resp.bad_wr = 0; ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) for (next = wr; next; next = next->next) { @@ -3458,7 +3461,7 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs, srq->device = pd->device; srq->pd = pd; srq->srq_type = cmd->srq_type; - srq->uobject = &obj->uevent.uobject; + srq->uobject = obj; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; @@ -3584,7 +3587,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs) ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3607,7 +3611,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs) ret = ib_query_srq(srq, &attr); - uobj_put_obj_read(srq); + rdma_lookup_put_uobject(&srq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); if (ret) return ret; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 12da68fca1c1..a501f76bc2f9 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -493,8 +493,7 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; - uobj = container_of(event->element.srq->uobject, - struct ib_uevent_object, uobject); + uobj = &event->element.srq->uobject->uevent; ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b4dbc5f9636a..7990b55b1b40 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -73,6 +73,7 @@ struct ib_umem_odp; struct ib_uqp_object; +struct ib_usrq_object; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -1575,7 +1576,7 @@ struct ib_cq { struct ib_srq { struct ib_device *device; struct ib_pd *pd; - struct ib_uobject *uobject; + struct ib_usrq_object *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; -- cgit v1.2.3 From e04dd13159b0ddc0ff7f5e110bf99af3c65fabd3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:22:01 +0200 Subject: RDMA/core: Do not erase the type of ib_wq.uobject This is a struct ib_uwq_object pointer, instead of using container_of() all over the place just store it with its actual type. Link: https://lore.kernel.org/r/1578504126-9400-10-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 13 ++++++++----- drivers/infiniband/core/uverbs_main.c | 3 +-- include/rdma/ib_verbs.h | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8350e023d3b5..66f86b4e5e81 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2971,7 +2971,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) goto err_put_cq; } - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; wq->wq_type = wq_init_attr.wq_type; wq->cq = cq; @@ -2981,7 +2981,7 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs) atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); - wq->uobject = &obj->uevent.uobject; + wq->uobject = obj; obj->uevent.uobject.object = wq; memset(&resp, 0, sizeof(resp)); @@ -3070,7 +3070,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs) } ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask, &attrs->driver_udata); - uobj_put_obj_read(wq); + rdma_lookup_put_uobject(&wq->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); return ret; } @@ -3171,7 +3172,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); rdma_alloc_commit_uobject(uobj, attrs); return 0; @@ -3182,7 +3184,8 @@ err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: for (j = 0; j < num_read_wqs; j++) - uobj_put_obj_read(wqs[j]); + rdma_lookup_put_uobject(&wqs[j]->uobject->uevent.uobject, + UVERBS_LOOKUP_READ); err_free: kfree(wqs_handles); kfree(wqs); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index a501f76bc2f9..87f7a94adf57 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -481,8 +481,7 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { - struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, - struct ib_uevent_object, uobject); + struct ib_uevent_object *uobj = &event->element.wq->uobject->uevent; ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7990b55b1b40..d8031f6f327e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -74,6 +74,7 @@ struct ib_umem_odp; struct ib_uqp_object; struct ib_usrq_object; +struct ib_uwq_object; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -1621,7 +1622,7 @@ enum ib_wq_state { struct ib_wq { struct ib_device *device; - struct ib_uobject *uobject; + struct ib_uwq_object *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; -- cgit v1.2.3 From 39e83af817610e80a9582df94d44173bcb6f85e4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jan 2020 19:22:04 +0200 Subject: RDMA/core: Remove the ufile arg from rdma_alloc_begin_uobject Now that all callers provide a non-NULL attrs the ufile is redundant. Adjust things so that the context handling is done inside alloc_uobj, and the ib_uverbs_get_ucontext_file() is avoided if we already have the context. Link: https://lore.kernel.org/r/1578504126-9400-13-git-send-email-yishaih@mellanox.com Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/rdma_core.c | 36 +++++++++++++++++++----------------- include/rdma/uverbs_std_types.h | 3 +-- include/rdma/uverbs_types.h | 3 +-- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index a9f5263c9559..50251da123f7 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -256,15 +256,20 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, } /* alloc_uobj must be undone by uverbs_destroy_uobject() */ -static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, +static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs, const struct uverbs_api_object *obj) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *uobj; - struct ib_ucontext *ucontext; - ucontext = ib_uverbs_get_ucontext_file(ufile); - if (IS_ERR(ucontext)) - return ERR_CAST(ucontext); + if (!attrs->context) { + struct ib_ucontext *ucontext = + ib_uverbs_get_ucontext_file(ufile); + + if (IS_ERR(ucontext)) + return ERR_CAST(ucontext); + attrs->context = ucontext; + } uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL); if (!uobj) @@ -274,7 +279,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile, * The object is added to the list in the commit stage. */ uobj->ufile = ufile; - uobj->context = ucontext; + uobj->context = attrs->context; INIT_LIST_HEAD(&uobj->list); uobj->uapi_object = obj; /* @@ -417,12 +422,12 @@ free: static struct ib_uobject * alloc_begin_idr_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { int ret; struct ib_uobject *uobj; - uobj = alloc_uobj(ufile, obj); + uobj = alloc_uobj(attrs, obj); if (IS_ERR(uobj)) return uobj; @@ -438,7 +443,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj, return uobj; remove: - xa_erase(&ufile->idr, uobj->id); + xa_erase(&attrs->ufile->idr, uobj->id); uobj_put: uverbs_uobject_put(uobj); return ERR_PTR(ret); @@ -446,7 +451,7 @@ uobj_put: static struct ib_uobject * alloc_begin_fd_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile) + struct uverbs_attr_bundle *attrs) { const struct uverbs_obj_fd_type *fd_type = container_of(obj->type_attrs, struct uverbs_obj_fd_type, type); @@ -461,7 +466,7 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj, if (new_fd < 0) return ERR_PTR(new_fd); - uobj = alloc_uobj(ufile, obj); + uobj = alloc_uobj(attrs, obj); if (IS_ERR(uobj)) goto err_fd; @@ -475,7 +480,6 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj, uobj->object = filp; uobj->id = new_fd; - uobj->ufile = ufile; return uobj; err_uobj: @@ -486,9 +490,9 @@ err_fd: } struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs) { + struct ib_uverbs_file *ufile = attrs->ufile; struct ib_uobject *ret; if (IS_ERR(obj)) @@ -502,13 +506,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, if (!down_read_trylock(&ufile->hw_destroy_rwsem)) return ERR_PTR(-EIO); - ret = obj->type_class->alloc_begin(obj, ufile); + ret = obj->type_class->alloc_begin(obj, attrs); if (IS_ERR(ret)) { up_read(&ufile->hw_destroy_rwsem); return ret; } - if (attrs) - attrs->context = ret->context; return ret; } @@ -930,7 +932,7 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access, return rdma_lookup_get_uobject(obj, attrs->ufile, id, UVERBS_LOOKUP_WRITE, attrs); case UVERBS_ACCESS_NEW: - return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); + return rdma_alloc_begin_uobject(obj, attrs); default: WARN_ON(true); return ERR_PTR(-EOPNOTSUPP); diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index c6bcaad4a8e0..1b28ce1aba07 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -114,8 +114,7 @@ static inline struct ib_uobject * __uobj_alloc(const struct uverbs_api_object *obj, struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev) { - struct ib_uobject *uobj = - rdma_alloc_begin_uobject(obj, attrs->ufile, attrs); + struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs); if (!IS_ERR(uobj)) *ib_dev = attrs->context->device; diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 01d59df4e9e7..f1cbdae67250 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -83,7 +83,7 @@ enum rdma_lookup_mode { */ struct uverbs_obj_type_class { struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile); + struct uverbs_attr_bundle *attrs); /* This consumes the kref on uobj */ void (*alloc_commit)(struct ib_uobject *uobj); /* This does not consume the kref on uobj */ @@ -137,7 +137,6 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj, void rdma_lookup_put_uobject(struct ib_uobject *uobj, enum rdma_lookup_mode mode); struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj, - struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *attrs); void rdma_alloc_abort_uobject(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs); -- cgit v1.2.3 From ca95c1411198c2d87217c19d44571052cdc94725 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 8 Jan 2020 20:05:35 +0200 Subject: RDMA/uverbs: Verify MR access flags Verify that MR access flags that are passed from user are all supported ones, otherwise an error is returned. Fixes: 4fca03778351 ("IB/uverbs: Move ib_access_flags and ib_read_counters_flags to uapi") Link: https://lore.kernel.org/r/1578506740-22188-6-git-send-email-yishaih@mellanox.com Signed-off-by: Michael Guralnik Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d8031f6f327e..b20b89e93c62 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4310,6 +4310,9 @@ static inline int ib_check_mr_access(int flags) !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; + if (flags & ~IB_ACCESS_SUPPORTED) + return -EINVAL; + return 0; } -- cgit v1.2.3 From 68d384b906cfc850b65561fd846adbb8b406d9e5 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 8 Jan 2020 20:05:36 +0200 Subject: RDMA/core: Add optional access flags range Define a range of access flags that are defined to be optional, both uverbs and drivers should enable getting them and use if they are applicable This will be used, for example, for the relaxed ordering access flag which unsupporting drivers can ignore. Link: https://lore.kernel.org/r/1578506740-22188-7-git-send-email-yishaih@mellanox.com Signed-off-by: Michael Guralnik Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 4 +++- include/uapi/rdma/ib_user_ioctl_verbs.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b20b89e93c62..ed6cf11612b3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1419,7 +1419,9 @@ enum ib_access_flags { IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, - IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1) + IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, + IB_ACCESS_SUPPORTED = + ((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL, }; /* diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 9019b2d906ea..76dbbd9b55fc 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -41,6 +41,9 @@ #define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name #endif +#define IB_UVERBS_ACCESS_OPTIONAL_FIRST (1 << 20) +#define IB_UVERBS_ACCESS_OPTIONAL_LAST (1 << 29) + enum ib_uverbs_access_flags { IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0, IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1, @@ -50,6 +53,10 @@ enum ib_uverbs_access_flags { IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5, IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, IB_UVERBS_ACCESS_HUGETLB = 1 << 7, + + IB_UVERBS_ACCESS_OPTIONAL_RANGE = + ((IB_UVERBS_ACCESS_OPTIONAL_LAST << 1) - 1) & + ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1) }; enum ib_uverbs_query_port_cap_flags { -- cgit v1.2.3 From 2233c6609c11146ed1a26eec2e4335131077a608 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 8 Jan 2020 20:05:38 +0200 Subject: RDMA/uverbs: Add new relaxed ordering memory region access flag Add a new relaxed ordering access flag for memory regions. Using memory regions with relaxed ordeing set can enhance performance. This access flag is handled in a best-effort manner, drivers should ignore if they don't support setting relaxed ordering. Link: https://lore.kernel.org/r/1578506740-22188-9-git-send-email-yishaih@mellanox.com Signed-off-by: Michael Guralnik Signed-off-by: Yishai Hadas Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 1 + include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ed6cf11612b3..6506df9f31ae 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1418,6 +1418,7 @@ enum ib_access_flags { IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED, IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND, IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB, + IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING, IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE, IB_ACCESS_SUPPORTED = diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 76dbbd9b55fc..2a165f40ee38 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -54,6 +54,7 @@ enum ib_uverbs_access_flags { IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6, IB_UVERBS_ACCESS_HUGETLB = 1 << 7, + IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST, IB_UVERBS_ACCESS_OPTIONAL_RANGE = ((IB_UVERBS_ACCESS_OPTIONAL_LAST << 1) - 1) & ~(IB_UVERBS_ACCESS_OPTIONAL_FIRST - 1) -- cgit v1.2.3 From d05d4ac4c9316a2e2ea5be434a7f7a021ec44a3c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 16 Jan 2020 13:00:32 -0400 Subject: RDMA/cm: Add SET/GET implementations to hide IBA wire format There is no separation between RDMA-CM wire format as it is declared in IBTA and kernel logic which implements needed support. Such situation causes to many mistakes in conversion between big-endian (wire format) and CPU format used by kernel. It also mixes RDMA core code with combination of uXX and beXX variables. The idea that all accesses to IBA definitions will go through special GET/SET macros to ensure that no conversion mistakes are made. The shifting and masking required to read the value is automatically deduced using the field offset description from the tables in the IBA specification. This starts with the CM MADs described in IBTA release 1.3 volume 1. To confirm that the new macros behave the same as the old accessors a self-test is included in this patch. Each macro replacing a straightforward struct field compile-time tests that the new field has the same offsetof() and width as the old field. For the fields with accessor functions a runtime test, the 'all ones' value is placed in a dummy message and read back in several ways to confirm that both approaches give identical results. Later patches in this series delete the self test. This creates a tested table of new field name, old field name(s) and some meta information like BE coding for the functions which will be used in the next patches. Link: https://lore.kernel.org/r/20200116170037.30109-3-jgg@ziepe.ca Link: https://lore.kernel.org/r/20191212093830.316934-5-leon@kernel.org Signed-off-by: Leon Romanovsky Tested-by: Leon Romanovsky Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 267 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/cm_msgs.h | 1 + include/rdma/iba.h | 146 +++++++++++++++++++++ include/rdma/ibta_vol1_c12.h | 208 +++++++++++++++++++++++++++++ 4 files changed, 622 insertions(+) create mode 100644 include/rdma/iba.h create mode 100644 include/rdma/ibta_vol1_c12.h (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index d396b987b0f2..7f609979e4de 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4253,10 +4253,277 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) kfree(cm_dev); } +/* + * Check at compile time that the byte offset and length of field old_name in + * the struct matches the byte offset and length in the new macro. + */ +#define _IBA_CHECK_OFF(old_name, field_struct, field_offset, mask, bits) \ + static_assert(offsetof(field_struct, old_name) == (field_offset)); \ + static_assert(bits == sizeof(((field_struct *)0)->old_name) * 8) +#define IBA_CHECK_OFF(field, old_name) _IBA_CHECK_OFF(old_name, field) + +IBA_CHECK_OFF(CM_REQ_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_REQ_SERVICE_ID, service_id); +IBA_CHECK_OFF(CM_REQ_LOCAL_CA_GUID, local_ca_guid); +IBA_CHECK_OFF(CM_REQ_LOCAL_Q_KEY, local_qkey); +IBA_CHECK_OFF(CM_REQ_PARTITION_KEY, pkey); +IBA_CHECK_OFF(CM_REQ_PRIMARY_LOCAL_PORT_LID, primary_local_lid); +IBA_CHECK_OFF(CM_REQ_PRIMARY_REMOTE_PORT_LID, primary_remote_lid); +IBA_CHECK_OFF(CM_REQ_PRIMARY_LOCAL_PORT_GID, primary_local_gid); +IBA_CHECK_OFF(CM_REQ_PRIMARY_REMOTE_PORT_GID, primary_remote_gid); +IBA_CHECK_OFF(CM_REQ_PRIMARY_TRAFFIC_CLASS, primary_traffic_class); +IBA_CHECK_OFF(CM_REQ_PRIMARY_HOP_LIMIT, primary_hop_limit); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_LOCAL_PORT_LID, alt_local_lid); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_REMOTE_PORT_LID, alt_remote_lid); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_LOCAL_PORT_GID, alt_local_gid); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_REMOTE_PORT_GID, alt_remote_gid); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_TRAFFIC_CLASS, alt_traffic_class); +IBA_CHECK_OFF(CM_REQ_ALTERNATE_HOP_LIMIT, alt_hop_limit); +IBA_CHECK_OFF(CM_REQ_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_MRA_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_MRA_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_MRA_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_REJ_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_REJ_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_REJ_REASON, reason); +IBA_CHECK_OFF(CM_REJ_ARI, ari); +IBA_CHECK_OFF(CM_REJ_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_REP_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_REP_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_REP_LOCAL_Q_KEY, local_qkey); +IBA_CHECK_OFF(CM_REP_RESPONDER_RESOURCES, resp_resources); +IBA_CHECK_OFF(CM_REP_INITIATOR_DEPTH, initiator_depth); +IBA_CHECK_OFF(CM_REP_LOCAL_CA_GUID, local_ca_guid); +IBA_CHECK_OFF(CM_REP_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_RTU_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_RTU_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_RTU_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_DREQ_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_DREQ_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_DREQ_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_DREP_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_DREP_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_DREP_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_LAP_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_LAP_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_LAP_ALTERNATE_LOCAL_PORT_LID, alt_local_lid); +IBA_CHECK_OFF(CM_LAP_ALTERNATE_REMOTE_PORT_LID, alt_remote_lid); +IBA_CHECK_OFF(CM_LAP_ALTERNATE_LOCAL_PORT_GID, alt_local_gid); +IBA_CHECK_OFF(CM_LAP_ALTERNATE_REMOTE_PORT_GID, alt_remote_gid); +IBA_CHECK_OFF(CM_LAP_ALTERNATE_HOP_LIMIT, alt_hop_limit); +IBA_CHECK_OFF(CM_LAP_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_APR_LOCAL_COMM_ID, local_comm_id); +IBA_CHECK_OFF(CM_APR_REMOTE_COMM_ID, remote_comm_id); +IBA_CHECK_OFF(CM_APR_ADDITIONAL_INFORMATION_LENGTH, info_length); +IBA_CHECK_OFF(CM_APR_AR_STATUS, ap_status); +IBA_CHECK_OFF(CM_APR_ADDITIONAL_INFORMATION, info); +IBA_CHECK_OFF(CM_APR_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_SIDR_REQ_REQUESTID, request_id); +IBA_CHECK_OFF(CM_SIDR_REQ_PARTITION_KEY, pkey); +IBA_CHECK_OFF(CM_SIDR_REQ_SERVICEID, service_id); +IBA_CHECK_OFF(CM_SIDR_REQ_PRIVATE_DATA, private_data); +IBA_CHECK_OFF(CM_SIDR_REP_REQUESTID, request_id); +IBA_CHECK_OFF(CM_SIDR_REP_STATUS, status); +IBA_CHECK_OFF(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH, info_length); +IBA_CHECK_OFF(CM_SIDR_REP_SERVICEID, service_id); +IBA_CHECK_OFF(CM_SIDR_REP_Q_KEY, qkey); +IBA_CHECK_OFF(CM_SIDR_REP_ADDITIONAL_INFORMATION, info); +IBA_CHECK_OFF(CM_SIDR_REP_PRIVATE_DATA, private_data); + +/* + * Check that the new macro gets the same bits as the old get function. + * - IBA_SET() IBA_GET and old get_fn all agree on the field width. + * The field width should match what IBA_SET truncates to + * - Reading from an all ones data should not return extra bits + * - Setting '1' should be the same (ie no endian problems) + */ +/* defeat builtin_constant checks */ +u64 cm_global_all_ones = 0xffffffffffffffffULL; +#define _IBA_CHECK_GET(fn, field_struct, field_offset, mask, bits) \ + ({ \ + field_struct *lmsg = (field_struct *)msg; \ + unsigned long long all_ones; \ + static_assert(sizeof(*lmsg) <= sizeof(msg)); \ + \ + bitmap_zero(msg, nbits); \ + _IBA_SET(field_struct, field_offset, mask, bits, lmsg, \ + cm_global_all_ones); \ + all_ones = (1ULL << bitmap_weight(msg, nbits)) - 1; \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + all_ones) { \ + printk("Failed #1 line=%u\n", __LINE__); \ + return; \ + } \ + if (fn != all_ones) { \ + printk("Failed #2 line=%u\n", __LINE__); \ + return; \ + } \ + \ + bitmap_fill(msg, nbits); \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + all_ones) { \ + printk("Failed #3 line=%u\n", __LINE__); \ + return; \ + } \ + if (fn != all_ones) { \ + printk("Failed #4 line=%u\n", __LINE__); \ + return; \ + } \ + \ + _IBA_SET(field_struct, field_offset, mask, bits, lmsg, 0); \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + 0) { \ + printk("Failed #5 line=%u\n", __LINE__); \ + return; \ + } \ + if (fn != 0) { \ + printk("Failed #6 line=%u\n", __LINE__); \ + return; \ + } \ + _IBA_SET(field_struct, field_offset, mask, bits, lmsg, 1); \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + 1) { \ + printk("Failed #7 line=%u\n", __LINE__); \ + return; \ + } \ + if (fn != 1) { \ + printk("Failed #8 line=%u\n", __LINE__); \ + return; \ + } \ + }) +#define IBA_CHECK_GET(field, fn_name) _IBA_CHECK_GET(fn_name(lmsg), field) +#define IBA_CHECK_GET_BE(field, fn_name) _IBA_CHECK_GET(be32_to_cpu(fn_name(lmsg)), field) + +/* + * Write the all ones value using the old setter and check that the new getter + * reads it back. + */ +#define _IBA_CHECK_SET(fn, field_struct, field_offset, mask, bits) \ + ({ \ + field_struct *lmsg = (field_struct *)msg; \ + unsigned long long all_ones; \ + static_assert(sizeof(*lmsg) <= sizeof(msg)); \ + \ + bitmap_zero(msg, nbits); \ + _IBA_SET(field_struct, field_offset, mask, bits, lmsg, \ + cm_global_all_ones); \ + all_ones = (1ULL << bitmap_weight(msg, nbits)) - 1; \ + bitmap_zero(msg, nbits); \ + fn; \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + all_ones) { \ + printk("Failed #9 line=%u\n", __LINE__); \ + return; \ + } \ + all_ones = 1; \ + fn; \ + if (_IBA_GET(field_struct, field_offset, mask, bits, lmsg) != \ + 1) { \ + printk("Failed #10 line=%u\n", __LINE__); \ + return; \ + } \ + }) + +#define IBA_CHECK_SET(field, fn_name) _IBA_CHECK_SET(fn_name(lmsg, all_ones), field) +#define IBA_CHECK_SET_BE(field, fn_name) \ + _IBA_CHECK_SET(fn_name(lmsg, cpu_to_be32(all_ones)), field) + +static void self_test(void) +{ + unsigned long msg[256/4]; + const unsigned int nbits = sizeof(msg) * 8; + + printk("Running CM extractor self test\n"); + IBA_CHECK_GET_BE(CM_REQ_LOCAL_QPN, cm_req_get_local_qpn); + IBA_CHECK_SET_BE(CM_REQ_LOCAL_QPN, cm_req_set_local_qpn); + IBA_CHECK_GET(CM_REQ_RESPONDER_RESOURCES, cm_req_get_resp_res); + IBA_CHECK_SET(CM_REQ_RESPONDER_RESOURCES, cm_req_set_resp_res); + IBA_CHECK_GET(CM_REQ_INITIATOR_DEPTH, cm_req_get_init_depth); + IBA_CHECK_SET(CM_REQ_INITIATOR_DEPTH, cm_req_set_init_depth); + IBA_CHECK_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, cm_req_get_remote_resp_timeout); + IBA_CHECK_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, cm_req_set_remote_resp_timeout); + IBA_CHECK_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, cm_req_get_transport_type); + IBA_CHECK_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, cm_req_set_transport_type); + IBA_CHECK_GET(CM_REQ_END_TO_END_FLOW_CONTROL, cm_req_get_flow_ctrl); + IBA_CHECK_SET(CM_REQ_END_TO_END_FLOW_CONTROL, cm_req_set_flow_ctrl); + IBA_CHECK_GET_BE(CM_REQ_STARTING_PSN, cm_req_get_starting_psn); + IBA_CHECK_SET_BE(CM_REQ_STARTING_PSN, cm_req_set_starting_psn); + IBA_CHECK_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, cm_req_get_local_resp_timeout); + IBA_CHECK_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, cm_req_set_local_resp_timeout); + IBA_CHECK_GET(CM_REQ_RETRY_COUNT, cm_req_get_retry_count); + IBA_CHECK_SET(CM_REQ_RETRY_COUNT, cm_req_set_retry_count); + IBA_CHECK_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, cm_req_get_path_mtu); + IBA_CHECK_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, cm_req_set_path_mtu); + IBA_CHECK_GET(CM_REQ_RNR_RETRY_COUNT, cm_req_get_rnr_retry_count); + IBA_CHECK_SET(CM_REQ_RNR_RETRY_COUNT, cm_req_set_rnr_retry_count); + IBA_CHECK_GET(CM_REQ_MAX_CM_RETRIES, cm_req_get_max_cm_retries); + IBA_CHECK_SET(CM_REQ_MAX_CM_RETRIES, cm_req_set_max_cm_retries); + IBA_CHECK_GET(CM_REQ_SRQ, cm_req_get_srq); + IBA_CHECK_SET(CM_REQ_SRQ, cm_req_set_srq); + IBA_CHECK_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, cm_req_get_transport_type_ex); + IBA_CHECK_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, cm_req_set_transport_type_ex); + IBA_CHECK_GET_BE(CM_REQ_PRIMARY_FLOW_LABEL, cm_req_get_primary_flow_label); + IBA_CHECK_SET_BE(CM_REQ_PRIMARY_FLOW_LABEL, cm_req_set_primary_flow_label); + IBA_CHECK_GET(CM_REQ_PRIMARY_PACKET_RATE, cm_req_get_primary_packet_rate); + IBA_CHECK_SET(CM_REQ_PRIMARY_PACKET_RATE, cm_req_set_primary_packet_rate); + IBA_CHECK_GET(CM_REQ_PRIMARY_SL, cm_req_get_primary_sl); + IBA_CHECK_SET(CM_REQ_PRIMARY_SL, cm_req_set_primary_sl); + IBA_CHECK_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, cm_req_get_primary_subnet_local); + IBA_CHECK_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, cm_req_set_primary_subnet_local); + IBA_CHECK_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, cm_req_get_primary_local_ack_timeout); + IBA_CHECK_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, cm_req_set_primary_local_ack_timeout); + IBA_CHECK_GET_BE(CM_REQ_ALTERNATE_FLOW_LABEL, cm_req_get_alt_flow_label); + IBA_CHECK_SET_BE(CM_REQ_ALTERNATE_FLOW_LABEL, cm_req_set_alt_flow_label); + IBA_CHECK_GET(CM_REQ_ALTERNATE_PACKET_RATE, cm_req_get_alt_packet_rate); + IBA_CHECK_SET(CM_REQ_ALTERNATE_PACKET_RATE, cm_req_set_alt_packet_rate); + IBA_CHECK_GET(CM_REQ_ALTERNATE_SL, cm_req_get_alt_sl); + IBA_CHECK_SET(CM_REQ_ALTERNATE_SL, cm_req_set_alt_sl); + IBA_CHECK_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, cm_req_get_alt_subnet_local); + IBA_CHECK_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, cm_req_set_alt_subnet_local); + IBA_CHECK_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, cm_req_get_alt_local_ack_timeout); + IBA_CHECK_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, cm_req_set_alt_local_ack_timeout); + IBA_CHECK_GET(CM_MRA_MESSAGE_MRAED, cm_mra_get_msg_mraed); + IBA_CHECK_SET(CM_MRA_MESSAGE_MRAED, cm_mra_set_msg_mraed); + IBA_CHECK_GET(CM_MRA_SERVICE_TIMEOUT, cm_mra_get_service_timeout); + IBA_CHECK_SET(CM_MRA_SERVICE_TIMEOUT, cm_mra_set_service_timeout); + IBA_CHECK_GET(CM_REJ_MESSAGE_REJECTED, cm_rej_get_msg_rejected); + IBA_CHECK_SET(CM_REJ_MESSAGE_REJECTED, cm_rej_set_msg_rejected); + IBA_CHECK_GET(CM_REJ_REJECTED_INFO_LENGTH, cm_rej_get_reject_info_len); + IBA_CHECK_SET(CM_REJ_REJECTED_INFO_LENGTH, cm_rej_set_reject_info_len); + IBA_CHECK_GET_BE(CM_REP_LOCAL_QPN, cm_rep_get_local_qpn); + IBA_CHECK_SET_BE(CM_REP_LOCAL_QPN, cm_rep_set_local_qpn); + IBA_CHECK_GET_BE(CM_REP_LOCAL_EE_CONTEXT_NUMBER, cm_rep_get_local_eecn); + IBA_CHECK_SET_BE(CM_REP_LOCAL_EE_CONTEXT_NUMBER, cm_rep_set_local_eecn); + IBA_CHECK_GET_BE(CM_REP_STARTING_PSN, cm_rep_get_starting_psn); + IBA_CHECK_SET_BE(CM_REP_STARTING_PSN, cm_rep_set_starting_psn); + IBA_CHECK_GET(CM_REP_TARGET_ACK_DELAY, cm_rep_get_target_ack_delay); + IBA_CHECK_SET(CM_REP_TARGET_ACK_DELAY, cm_rep_set_target_ack_delay); + IBA_CHECK_GET(CM_REP_FAILOVER_ACCEPTED, cm_rep_get_failover); + IBA_CHECK_SET(CM_REP_FAILOVER_ACCEPTED, cm_rep_set_failover); + IBA_CHECK_GET(CM_REP_END_TO_END_FLOW_CONTROL, cm_rep_get_flow_ctrl); + IBA_CHECK_SET(CM_REP_END_TO_END_FLOW_CONTROL, cm_rep_set_flow_ctrl); + IBA_CHECK_GET(CM_REP_RNR_RETRY_COUNT, cm_rep_get_rnr_retry_count); + IBA_CHECK_SET(CM_REP_RNR_RETRY_COUNT, cm_rep_set_rnr_retry_count); + IBA_CHECK_GET(CM_REP_SRQ, cm_rep_get_srq); + IBA_CHECK_SET(CM_REP_SRQ, cm_rep_set_srq); + IBA_CHECK_GET_BE(CM_DREQ_REMOTE_QPN_EECN, cm_dreq_get_remote_qpn); + IBA_CHECK_SET_BE(CM_DREQ_REMOTE_QPN_EECN, cm_dreq_set_remote_qpn); + IBA_CHECK_GET_BE(CM_LAP_ALTERNATE_FLOW_LABEL, cm_lap_get_flow_label); + IBA_CHECK_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, cm_lap_get_traffic_class); + IBA_CHECK_GET(CM_LAP_ALTERNATE_PACKET_RATE, cm_lap_get_packet_rate); + IBA_CHECK_GET(CM_LAP_ALTERNATE_SL, cm_lap_get_sl); + IBA_CHECK_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, cm_lap_get_local_ack_timeout); + IBA_CHECK_GET_BE(CM_SIDR_REP_QPN, cm_sidr_rep_get_qpn); + IBA_CHECK_SET_BE(CM_SIDR_REP_QPN, cm_sidr_rep_set_qpn); + printk("Success!\n"); +} + static int __init ib_cm_init(void) { int ret; + self_test(); + INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 9af9a3212074..bf62461d801f 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -8,6 +8,7 @@ #ifndef CM_MSGS_H #define CM_MSGS_H +#include #include #include diff --git a/include/rdma/iba.h b/include/rdma/iba.h new file mode 100644 index 000000000000..6a1115b02a0d --- /dev/null +++ b/include/rdma/iba.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + */ +#ifndef _IBA_DEFS_H_ +#define _IBA_DEFS_H_ + +#include +#include +#include + +static inline u32 _iba_get8(const u8 *ptr) +{ + return *ptr; +} + +static inline void _iba_set8(u8 *ptr, u32 mask, u32 prep_value) +{ + *ptr = (*ptr & ~mask) | prep_value; +} + +static inline u16 _iba_get16(const __be16 *ptr) +{ + return be16_to_cpu(*ptr); +} + +static inline void _iba_set16(__be16 *ptr, u16 mask, u16 prep_value) +{ + *ptr = cpu_to_be16((be16_to_cpu(*ptr) & ~mask) | prep_value); +} + +static inline u32 _iba_get32(const __be32 *ptr) +{ + return be32_to_cpu(*ptr); +} + +static inline void _iba_set32(__be32 *ptr, u32 mask, u32 prep_value) +{ + *ptr = cpu_to_be32((be32_to_cpu(*ptr) & ~mask) | prep_value); +} + +static inline u64 _iba_get64(const __be64 *ptr) +{ + /* + * The mads are constructed so that 32 bit and smaller are naturally + * aligned, everything larger has a max alignment of 4 bytes. + */ + return be64_to_cpu(get_unaligned(ptr)); +} + +static inline void _iba_set64(__be64 *ptr, u64 mask, u64 prep_value) +{ + put_unaligned(cpu_to_be64((_iba_get64(ptr) & ~mask) | prep_value), ptr); +} + +#define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \ + ({ \ + field_struct *_ptr = ptr; \ + _iba_set##num_bits((void *)_ptr + (field_offset), field_mask, \ + FIELD_PREP(field_mask, value)); \ + }) +#define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value) + +#define _IBA_GET_MEM_PTR(field_struct, field_offset, type, num_bits, ptr) \ + ({ \ + field_struct *_ptr = ptr; \ + (type *)((void *)_ptr + (field_offset)); \ + }) +#define IBA_GET_MEM_PTR(field, ptr) _IBA_GET_MEM_PTR(field, ptr) + +/* FIXME: A set should always set the entire field, meaning we should zero the trailing bytes */ +#define _IBA_SET_MEM(field_struct, field_offset, type, num_bits, ptr, in, \ + bytes) \ + ({ \ + const type *_in_ptr = in; \ + WARN_ON(bytes * 8 > num_bits); \ + if (in && bytes) \ + memcpy(_IBA_GET_MEM_PTR(field_struct, field_offset, \ + type, num_bits, ptr), \ + _in_ptr, bytes); \ + }) +#define IBA_SET_MEM(field, ptr, in, bytes) _IBA_SET_MEM(field, ptr, in, bytes) + +#define _IBA_GET(field_struct, field_offset, field_mask, num_bits, ptr) \ + ({ \ + const field_struct *_ptr = ptr; \ + (u##num_bits) FIELD_GET( \ + field_mask, _iba_get##num_bits((const void *)_ptr + \ + (field_offset))); \ + }) +#define IBA_GET(field, ptr) _IBA_GET(field, ptr) + +#define _IBA_GET_MEM(field_struct, field_offset, type, num_bits, ptr, out, \ + bytes) \ + ({ \ + type *_out_ptr = out; \ + WARN_ON(bytes * 8 > num_bits); \ + if (out && bytes) \ + memcpy(_out_ptr, \ + _IBA_GET_MEM_PTR(field_struct, field_offset, \ + type, num_bits, ptr), \ + bytes); \ + }) +#define IBA_GET_MEM(field, ptr, out, bytes) _IBA_GET_MEM(field, ptr, out, bytes) + +/* + * The generated list becomes the parameters to the macros, the order is: + * - struct this applies to + * - starting offset of the max + * - GENMASK or GENMASK_ULL in CPU order + * - The width of data the mask operations should work on, in bits + */ + +/* + * Extraction using a tabular description like table 106. bit_offset is from + * the Byte[Bit] notation. + */ +#define IBA_FIELD_BLOC(field_struct, byte_offset, bit_offset, num_bits) \ + field_struct, byte_offset, \ + GENMASK(7 - (bit_offset), 7 - (bit_offset) - (num_bits - 1)), \ + 8 +#define IBA_FIELD8_LOC(field_struct, byte_offset, num_bits) \ + IBA_FIELD_BLOC(field_struct, byte_offset, 0, num_bits) + +#define IBA_FIELD16_LOC(field_struct, byte_offset, num_bits) \ + field_struct, (byte_offset)&0xFFFE, \ + GENMASK(15 - (((byte_offset) % 2) * 8), \ + 15 - (((byte_offset) % 2) * 8) - (num_bits - 1)), \ + 16 + +#define IBA_FIELD32_LOC(field_struct, byte_offset, num_bits) \ + field_struct, (byte_offset)&0xFFFC, \ + GENMASK(31 - (((byte_offset) % 4) * 8), \ + 31 - (((byte_offset) % 4) * 8) - (num_bits - 1)), \ + 32 + +#define IBA_FIELD64_LOC(field_struct, byte_offset) \ + field_struct, byte_offset, GENMASK_ULL(63, 0), 64 +/* + * In IBTA spec, everything that is more than 64bits is multiple + * of bytes without leftover bits. + */ +#define IBA_FIELD_MLOC(field_struct, byte_offset, num_bits, type) \ + field_struct, byte_offset, type, num_bits + +#endif /* _IBA_DEFS_H_ */ diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h new file mode 100644 index 000000000000..916db5c27dc5 --- /dev/null +++ b/include/rdma/ibta_vol1_c12.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. + * + * This file is IBTA volume 1, chapter 12 declarations: + * CHAPTER 12: COMMUNICATION MANAGEMENT + */ +#ifndef _IBTA_VOL1_C12_H_ +#define _IBTA_VOL1_C12_H_ + +#include + +#define CM_FIELD_BLOC(field_struct, byte_offset, bits_offset, width) \ + IBA_FIELD_BLOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), bits_offset, \ + width) +#define CM_FIELD8_LOC(field_struct, byte_offset, width) \ + IBA_FIELD8_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD16_LOC(field_struct, byte_offset, width) \ + IBA_FIELD16_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD32_LOC(field_struct, byte_offset, width) \ + IBA_FIELD32_LOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width) +#define CM_FIELD64_LOC(field_struct, byte_offset) \ + IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr))) +#define CM_FIELD_MLOC(field_struct, byte_offset, width, type) \ + IBA_FIELD_MLOC(field_struct, \ + (byte_offset + sizeof(struct ib_mad_hdr)), width, type) +#define CM_STRUCT(field_struct, total_len) \ + static_assert((total_len) % 32 == 0); + +/* Table 106 REQ Message Contents */ +#define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) +#define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8) +#define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16) +#define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32) +#define CM_REQ_LOCAL_QPN CM_FIELD32_LOC(struct cm_req_msg, 32, 24) +#define CM_REQ_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_req_msg, 35, 8) +#define CM_REQ_LOCAL_EECN CM_FIELD32_LOC(struct cm_req_msg, 36, 24) +#define CM_REQ_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_req_msg, 39, 8) +#define CM_REQ_REMOTE_EECN CM_FIELD32_LOC(struct cm_req_msg, 40, 24) +#define CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT \ + CM_FIELD8_LOC(struct cm_req_msg, 43, 5) +#define CM_REQ_TRANSPORT_SERVICE_TYPE CM_FIELD_BLOC(struct cm_req_msg, 43, 5, 2) +#define CM_REQ_END_TO_END_FLOW_CONTROL \ + CM_FIELD_BLOC(struct cm_req_msg, 43, 7, 1) +#define CM_REQ_STARTING_PSN CM_FIELD32_LOC(struct cm_req_msg, 44, 24) +#define CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 47, 5) +#define CM_REQ_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 47, 5, 3) +#define CM_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_req_msg, 48, 16) +#define CM_REQ_PATH_PACKET_PAYLOAD_MTU CM_FIELD8_LOC(struct cm_req_msg, 50, 4) +#define CM_REQ_RDC_EXISTS CM_FIELD_BLOC(struct cm_req_msg, 50, 4, 1) +#define CM_REQ_RNR_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 50, 5, 3) +#define CM_REQ_MAX_CM_RETRIES CM_FIELD8_LOC(struct cm_req_msg, 51, 4) +#define CM_REQ_SRQ CM_FIELD_BLOC(struct cm_req_msg, 51, 4, 1) +#define CM_REQ_EXTENDED_TRANSPORT_TYPE \ + CM_FIELD_BLOC(struct cm_req_msg, 51, 5, 3) +#define CM_REQ_PRIMARY_LOCAL_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 52, 16) +#define CM_REQ_PRIMARY_REMOTE_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 54, 16) +#define CM_REQ_PRIMARY_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 56, 128, union ib_gid) +#define CM_REQ_PRIMARY_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 72, 128, union ib_gid) +#define CM_REQ_PRIMARY_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 88, 20) +#define CM_REQ_PRIMARY_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 91, 2, 6) +#define CM_REQ_PRIMARY_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 92, 8) +#define CM_REQ_PRIMARY_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 93, 8) +#define CM_REQ_PRIMARY_SL CM_FIELD8_LOC(struct cm_req_msg, 94, 4) +#define CM_REQ_PRIMARY_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_req_msg, 94, 4, 1) +#define CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 95, 5) +#define CM_REQ_ALTERNATE_LOCAL_PORT_LID \ + CM_FIELD16_LOC(struct cm_req_msg, 96, 16) +#define CM_REQ_ALTERNATE_REMOTE_PORT_LID \ + CM_FIELD16_LOC(struct cm_req_msg, 98, 16) +#define CM_REQ_ALTERNATE_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 100, 128, union ib_gid) +#define CM_REQ_ALTERNATE_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_req_msg, 116, 128, union ib_gid) +#define CM_REQ_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 132, 20) +#define CM_REQ_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 135, 2, 6) +#define CM_REQ_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 136, 8) +#define CM_REQ_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 137, 8) +#define CM_REQ_ALTERNATE_SL CM_FIELD8_LOC(struct cm_req_msg, 138, 4) +#define CM_REQ_ALTERNATE_SUBNET_LOCAL \ + CM_FIELD_BLOC(struct cm_req_msg, 138, 4, 1) +#define CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT \ + CM_FIELD8_LOC(struct cm_req_msg, 139, 5) +#define CM_REQ_SAP_SUPPORTED CM_FIELD_BLOC(struct cm_req_msg, 139, 5, 1) +#define CM_REQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_req_msg, 140, 736, void) +CM_STRUCT(struct cm_req_msg, 140 * 8 + 736); + +/* Table 107 MRA Message Contents */ +#define CM_MRA_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 0, 32) +#define CM_MRA_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 4, 32) +#define CM_MRA_MESSAGE_MRAED CM_FIELD8_LOC(struct cm_mra_msg, 8, 2) +#define CM_MRA_SERVICE_TIMEOUT CM_FIELD8_LOC(struct cm_mra_msg, 9, 5) +#define CM_MRA_PRIVATE_DATA CM_FIELD_MLOC(struct cm_mra_msg, 10, 1776, void) +CM_STRUCT(struct cm_mra_msg, 10 * 8 + 1776); + +/* Table 108 REJ Message Contents */ +#define CM_REJ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 0, 32) +#define CM_REJ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 4, 32) +#define CM_REJ_MESSAGE_REJECTED CM_FIELD8_LOC(struct cm_rej_msg, 8, 2) +#define CM_REJ_REJECTED_INFO_LENGTH CM_FIELD8_LOC(struct cm_rej_msg, 9, 7) +#define CM_REJ_REASON CM_FIELD16_LOC(struct cm_rej_msg, 10, 16) +#define CM_REJ_ARI CM_FIELD_MLOC(struct cm_rej_msg, 12, 576, void) +#define CM_REJ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rej_msg, 84, 1184, void) +CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184); + +/* Table 110 REP Message Contents */ +#define CM_REP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 0, 32) +#define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32) +#define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32) +#define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24) +#define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24) +#define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24) +#define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8) +#define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8) +#define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5) +#define CM_REP_FAILOVER_ACCEPTED CM_FIELD_BLOC(struct cm_rep_msg, 26, 5, 2) +#define CM_REP_END_TO_END_FLOW_CONTROL \ + CM_FIELD_BLOC(struct cm_rep_msg, 26, 7, 1) +#define CM_REP_RNR_RETRY_COUNT CM_FIELD8_LOC(struct cm_rep_msg, 27, 3) +#define CM_REP_SRQ CM_FIELD_BLOC(struct cm_rep_msg, 27, 3, 1) +#define CM_REP_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_rep_msg, 28) +#define CM_REP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rep_msg, 36, 1568, void) +CM_STRUCT(struct cm_rep_msg, 36 * 8 + 1568); + +/* Table 111 RTU Message Contents */ +#define CM_RTU_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 0, 32) +#define CM_RTU_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 4, 32) +#define CM_RTU_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rtu_msg, 8, 1792, void) +CM_STRUCT(struct cm_rtu_msg, 8 * 8 + 1792); + +/* Table 112 DREQ Message Contents */ +#define CM_DREQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 0, 32) +#define CM_DREQ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 4, 32) +#define CM_DREQ_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_dreq_msg, 8, 24) +#define CM_DREQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_dreq_msg, 12, 1760, void) +CM_STRUCT(struct cm_dreq_msg, 12 * 8 + 1760); + +/* Table 113 DREP Message Contents */ +#define CM_DREP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 0, 32) +#define CM_DREP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 4, 32) +#define CM_DREP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_drep_msg, 8, 1792, void) +CM_STRUCT(struct cm_drep_msg, 8 * 8 + 1792); + +/* Table 115 LAP Message Contents */ +#define CM_LAP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 0, 32) +#define CM_LAP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 4, 32) +#define CM_LAP_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_lap_msg, 12, 24) +#define CM_LAP_REMOTE_CM_RESPONSE_TIMEOUT \ + CM_FIELD8_LOC(struct cm_lap_msg, 15, 5) +#define CM_LAP_ALTERNATE_LOCAL_PORT_LID \ + CM_FIELD16_LOC(struct cm_lap_msg, 20, 16) +#define CM_LAP_ALTERNATE_REMOTE_PORT_LID \ + CM_FIELD16_LOC(struct cm_lap_msg, 22, 16) +#define CM_LAP_ALTERNATE_LOCAL_PORT_GID \ + CM_FIELD_MLOC(struct cm_lap_msg, 24, 128, union ib_gid) +#define CM_LAP_ALTERNATE_REMOTE_PORT_GID \ + CM_FIELD_MLOC(struct cm_lap_msg, 40, 128, union ib_gid) +#define CM_LAP_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_lap_msg, 56, 20) +#define CM_LAP_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_lap_msg, 59, 8) +#define CM_LAP_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_lap_msg, 60, 8) +#define CM_LAP_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_lap_msg, 61, 2, 6) +#define CM_LAP_ALTERNATE_SL CM_FIELD8_LOC(struct cm_lap_msg, 62, 4) +#define CM_LAP_ALTERNATE_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_lap_msg, 62, 4, 1) +#define CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT \ + CM_FIELD8_LOC(struct cm_lap_msg, 63, 5) +#define CM_LAP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_lap_msg, 64, 1344, void) +CM_STRUCT(struct cm_lap_msg, 64 * 8 + 1344); + +/* Table 116 APR Message Contents */ +#define CM_APR_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 0, 32) +#define CM_APR_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 4, 32) +#define CM_APR_ADDITIONAL_INFORMATION_LENGTH \ + CM_FIELD8_LOC(struct cm_apr_msg, 8, 8) +#define CM_APR_AR_STATUS CM_FIELD8_LOC(struct cm_apr_msg, 9, 8) +#define CM_APR_ADDITIONAL_INFORMATION \ + CM_FIELD_MLOC(struct cm_apr_msg, 12, 576, void) +#define CM_APR_PRIVATE_DATA CM_FIELD_MLOC(struct cm_apr_msg, 84, 1184, void) +CM_STRUCT(struct cm_apr_msg, 84 * 8 + 1184); + +/* Table 119 SIDR_REQ Message Contents */ +#define CM_SIDR_REQ_REQUESTID CM_FIELD32_LOC(struct cm_sidr_req_msg, 0, 32) +#define CM_SIDR_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_sidr_req_msg, 4, 16) +#define CM_SIDR_REQ_SERVICEID CM_FIELD64_LOC(struct cm_sidr_req_msg, 8) +#define CM_SIDR_REQ_PRIVATE_DATA \ + CM_FIELD_MLOC(struct cm_sidr_req_msg, 16, 1728, void) +CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728); + +/* Table 120 SIDR_REP Message Contents */ +#define CM_SIDR_REP_REQUESTID CM_FIELD32_LOC(struct cm_sidr_rep_msg, 0, 32) +#define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8) +#define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH \ + CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8) +#define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24) +#define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12) +#define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32) +#define CM_SIDR_REP_ADDITIONAL_INFORMATION \ + CM_FIELD_MLOC(struct cm_sidr_rep_msg, 24, 576, void) +#define CM_SIDR_REP_PRIVATE_DATA \ + CM_FIELD_MLOC(struct cm_sidr_rep_msg, 96, 1088, void) +CM_STRUCT(struct cm_sidr_rep_msg, 96 * 8 + 1088); + +#endif /* _IBTA_VOL1_C12_H_ */ -- cgit v1.2.3 From 13e0af1801f2e74639b4eadb69ed1fad2cf802f7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 16 Jan 2020 13:00:37 -0400 Subject: RDMA/cm: Remove CM message structs All accesses now use the new IBA acessor scheme, so delete the structs entirely and generate the structures from the schema file. Link: https://lore.kernel.org/r/20200116170037.30109-8-jgg@ziepe.ca Tested-by: Leon Romanovsky Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 77 -------------- drivers/infiniband/core/cm_msgs.h | 211 -------------------------------------- include/rdma/ibta_vol1_c12.h | 7 +- 3 files changed, 6 insertions(+), 289 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 5ccd59f1ebb8..68cc1b2d6824 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4426,83 +4426,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) kfree(cm_dev); } -/* - * Check at compile time that the byte offset and length of field old_name in - * the struct matches the byte offset and length in the new macro. - */ -#define _IBA_CHECK_OFF(old_name, field_struct, field_offset, mask, bits) \ - static_assert(offsetof(field_struct, old_name) == (field_offset)); \ - static_assert(bits == sizeof(((field_struct *)0)->old_name) * 8) -#define IBA_CHECK_OFF(field, old_name) _IBA_CHECK_OFF(old_name, field) - -IBA_CHECK_OFF(CM_REQ_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_REQ_SERVICE_ID, service_id); -IBA_CHECK_OFF(CM_REQ_LOCAL_CA_GUID, local_ca_guid); -IBA_CHECK_OFF(CM_REQ_LOCAL_Q_KEY, local_qkey); -IBA_CHECK_OFF(CM_REQ_PARTITION_KEY, pkey); -IBA_CHECK_OFF(CM_REQ_PRIMARY_LOCAL_PORT_LID, primary_local_lid); -IBA_CHECK_OFF(CM_REQ_PRIMARY_REMOTE_PORT_LID, primary_remote_lid); -IBA_CHECK_OFF(CM_REQ_PRIMARY_LOCAL_PORT_GID, primary_local_gid); -IBA_CHECK_OFF(CM_REQ_PRIMARY_REMOTE_PORT_GID, primary_remote_gid); -IBA_CHECK_OFF(CM_REQ_PRIMARY_TRAFFIC_CLASS, primary_traffic_class); -IBA_CHECK_OFF(CM_REQ_PRIMARY_HOP_LIMIT, primary_hop_limit); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_LOCAL_PORT_LID, alt_local_lid); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_REMOTE_PORT_LID, alt_remote_lid); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_LOCAL_PORT_GID, alt_local_gid); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_REMOTE_PORT_GID, alt_remote_gid); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_TRAFFIC_CLASS, alt_traffic_class); -IBA_CHECK_OFF(CM_REQ_ALTERNATE_HOP_LIMIT, alt_hop_limit); -IBA_CHECK_OFF(CM_REQ_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_MRA_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_MRA_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_MRA_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_REJ_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_REJ_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_REJ_REASON, reason); -IBA_CHECK_OFF(CM_REJ_ARI, ari); -IBA_CHECK_OFF(CM_REJ_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_REP_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_REP_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_REP_LOCAL_Q_KEY, local_qkey); -IBA_CHECK_OFF(CM_REP_RESPONDER_RESOURCES, resp_resources); -IBA_CHECK_OFF(CM_REP_INITIATOR_DEPTH, initiator_depth); -IBA_CHECK_OFF(CM_REP_LOCAL_CA_GUID, local_ca_guid); -IBA_CHECK_OFF(CM_REP_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_RTU_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_RTU_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_RTU_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_DREQ_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_DREQ_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_DREQ_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_DREP_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_DREP_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_DREP_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_LAP_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_LAP_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_LAP_ALTERNATE_LOCAL_PORT_LID, alt_local_lid); -IBA_CHECK_OFF(CM_LAP_ALTERNATE_REMOTE_PORT_LID, alt_remote_lid); -IBA_CHECK_OFF(CM_LAP_ALTERNATE_LOCAL_PORT_GID, alt_local_gid); -IBA_CHECK_OFF(CM_LAP_ALTERNATE_REMOTE_PORT_GID, alt_remote_gid); -IBA_CHECK_OFF(CM_LAP_ALTERNATE_HOP_LIMIT, alt_hop_limit); -IBA_CHECK_OFF(CM_LAP_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_APR_LOCAL_COMM_ID, local_comm_id); -IBA_CHECK_OFF(CM_APR_REMOTE_COMM_ID, remote_comm_id); -IBA_CHECK_OFF(CM_APR_ADDITIONAL_INFORMATION_LENGTH, info_length); -IBA_CHECK_OFF(CM_APR_AR_STATUS, ap_status); -IBA_CHECK_OFF(CM_APR_ADDITIONAL_INFORMATION, info); -IBA_CHECK_OFF(CM_APR_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_SIDR_REQ_REQUESTID, request_id); -IBA_CHECK_OFF(CM_SIDR_REQ_PARTITION_KEY, pkey); -IBA_CHECK_OFF(CM_SIDR_REQ_SERVICEID, service_id); -IBA_CHECK_OFF(CM_SIDR_REQ_PRIVATE_DATA, private_data); -IBA_CHECK_OFF(CM_SIDR_REP_REQUESTID, request_id); -IBA_CHECK_OFF(CM_SIDR_REP_STATUS, status); -IBA_CHECK_OFF(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH, info_length); -IBA_CHECK_OFF(CM_SIDR_REP_SERVICEID, service_id); -IBA_CHECK_OFF(CM_SIDR_REP_Q_KEY, qkey); -IBA_CHECK_OFF(CM_SIDR_REP_ADDITIONAL_INFORMATION, info); -IBA_CHECK_OFF(CM_SIDR_REP_PRIVATE_DATA, private_data); - static int __init ib_cm_init(void) { int ret; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 86ab6952d5d8..0cc40656b5c5 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -19,62 +19,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -struct cm_req_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 rsvd4; - __be64 service_id; - __be64 local_ca_guid; - __be32 rsvd24; - __be32 local_qkey; - /* local QPN:24, responder resources:8 */ - __be32 offset32; - /* local EECN:24, initiator depth:8 */ - __be32 offset36; - /* - * remote EECN:24, remote CM response timeout:5, - * transport service type:2, end-to-end flow control:1 - */ - __be32 offset40; - /* starting PSN:24, local CM response timeout:5, retry count:3 */ - __be32 offset44; - __be16 pkey; - /* path MTU:4, RDC exists:1, RNR retry count:3. */ - u8 offset50; - /* max CM Retries:4, SRQ:1, extended transport type:3 */ - u8 offset51; - - __be16 primary_local_lid; - __be16 primary_remote_lid; - union ib_gid primary_local_gid; - union ib_gid primary_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 primary_offset88; - u8 primary_traffic_class; - u8 primary_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 primary_offset94; - /* local ACK timeout:5, rsvd:3 */ - u8 primary_offset95; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:6, packet rate:6 */ - __be32 alt_offset132; - u8 alt_traffic_class; - u8 alt_hop_limit; - /* SL:4, subnet local:1, rsvd:3 */ - u8 alt_offset138; - /* local ACK timeout:5, rsvd:3 */ - u8 alt_offset139; - - u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; - -} __packed; - static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) { u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg); @@ -113,60 +57,6 @@ enum cm_msg_response { CM_MSG_RESPONSE_OTHER = 0x2 }; - struct cm_mra_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message MRAed:2, rsvd:6 */ - u8 offset8; - /* service timeout:5, rsvd:3 */ - u8 offset9; - - u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_rej_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* message REJected:2, rsvd:6 */ - u8 offset8; - /* reject info length:7, rsvd:1. */ - u8 offset9; - __be16 reason; - u8 ari[IB_CM_REJ_ARI_LENGTH]; - - u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_rep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - __be32 local_qkey; - /* local QPN:24, rsvd:8 */ - __be32 offset12; - /* local EECN:24, rsvd:8 */ - __be32 offset16; - /* starting PSN:24 rsvd:8 */ - __be32 offset20; - u8 resp_resources; - u8 initiator_depth; - /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ - u8 offset26; - /* RNR retry count:3, SRQ:1, rsvd:5 */ - u8 offset27; - __be64 local_ca_guid; - - u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; - -} __packed; - static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) { return (qp_type == IB_QPT_XRC_INI) ? @@ -175,105 +65,4 @@ static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg)); } -struct cm_rtu_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_dreq_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - /* remote QPN/EECN:24, rsvd:8 */ - __be32 offset8; - - u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_drep_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; - -} __packed; - -struct cm_lap_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - __be32 rsvd8; - /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ - __be32 offset12; - __be32 rsvd16; - - __be16 alt_local_lid; - __be16 alt_remote_lid; - union ib_gid alt_local_gid; - union ib_gid alt_remote_gid; - /* flow label:20, rsvd:4, traffic class:8 */ - __be32 offset56; - u8 alt_hop_limit; - /* rsvd:2, packet rate:6 */ - u8 offset61; - /* SL:4, subnet local:1, rsvd:3 */ - u8 offset62; - /* local ACK timeout:5, rsvd:3 */ - u8 offset63; - - u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; -} __packed; - -struct cm_apr_msg { - struct ib_mad_hdr hdr; - - __be32 local_comm_id; - __be32 remote_comm_id; - - u8 info_length; - u8 ap_status; - __be16 rsvd; - u8 info[IB_CM_APR_INFO_LENGTH]; - - u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; -} __packed; - -struct cm_sidr_req_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - __be16 pkey; - __be16 rsvd; - __be64 service_id; - - u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; -} __packed; - -struct cm_sidr_rep_msg { - struct ib_mad_hdr hdr; - - __be32 request_id; - u8 status; - u8 info_length; - __be16 rsvd; - /* QPN:24, rsvd:8 */ - __be32 offset8; - __be64 service_id; - __be32 qkey; - u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; - - u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; -} __packed; - #endif /* CM_MSGS_H */ diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h index 916db5c27dc5..269904425d3f 100644 --- a/include/rdma/ibta_vol1_c12.h +++ b/include/rdma/ibta_vol1_c12.h @@ -29,7 +29,12 @@ IBA_FIELD_MLOC(field_struct, \ (byte_offset + sizeof(struct ib_mad_hdr)), width, type) #define CM_STRUCT(field_struct, total_len) \ - static_assert((total_len) % 32 == 0); + field_struct \ + { \ + struct ib_mad_hdr hdr; \ + u32 _data[(total_len) / 32 + \ + BUILD_BUG_ON_ZERO((total_len) % 32 != 0)]; \ + } /* Table 106 REQ Message Contents */ #define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32) -- cgit v1.2.3 From 8889f6fa35884d09f24734e10fea0c9ddcbc6429 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 30 Jan 2020 11:21:21 -0400 Subject: RDMA/core: Make the entire API tree static Compilation of mlx5 driver without CONFIG_INFINIBAND_USER_ACCESS generates the following error. on x86_64: ld: drivers/infiniband/hw/mlx5/main.o: in function `mlx5_ib_handler_MLX5_IB_METHOD_VAR_OBJ_ALLOC': main.c:(.text+0x186d): undefined reference to `ib_uverbs_get_ucontext_file' ld: drivers/infiniband/hw/mlx5/main.o:(.rodata+0x2480): undefined reference to `uverbs_idr_class' ld: drivers/infiniband/hw/mlx5/main.o:(.rodata+0x24d8): undefined reference to `uverbs_destroy_def_handler' This is happening because some parts of the UAPI description are not static. This is a hold over from earlier code that relied on struct pointers to refer to object types, now object types are referenced by number. Remove the unused globals and add statics to the remaining UAPI description elements. Remove the redundent #ifdefs around mlx5_ib_*defs and obsolete mlx5_ib_get_devx_tree(). The compiler now trims alot more unused code, including the above problematic definitions when !CONFIG_INFINIBAND_USER_ACCESS. Fixes: 7be76bef320b ("IB/mlx5: Introduce VAR object and its alloc/destroy methods") Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs.h | 17 ----------------- drivers/infiniband/hw/mlx5/main.c | 2 -- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +++---- include/rdma/uverbs_named_ioctl.h | 6 +++--- 4 files changed, 6 insertions(+), 26 deletions(-) (limited to 'include/rdma') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 4d4cec46d251..7df71983212d 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -271,23 +271,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, size_t kern_filter_sz, union ib_flow_spec *ib_spec); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM); -extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS); - /* * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the * PortInfo CapabilityMask, but was extended with unique bits. diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 01fc09f3ddd3..0ca958143280 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6247,10 +6247,8 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( enum mlx5_ib_uapi_flow_action_flags)); static const struct uapi_definition mlx5_ib_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), -#endif UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7b019bd4de4b..d9bffcc93587 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1379,14 +1379,14 @@ int mlx5_ib_fill_res_entry(struct sk_buff *msg, int mlx5_ib_fill_stat_entry(struct sk_buff *msg, struct rdma_restrack_entry *res); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; + #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev); void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev); -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); -extern const struct uapi_definition mlx5_ib_devx_defs[]; -extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, @@ -1394,7 +1394,6 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h index 3447bfe356d6..6ae6cf8e4c2e 100644 --- a/include/rdma/uverbs_named_ioctl.h +++ b/include/rdma/uverbs_named_ioctl.h @@ -76,7 +76,7 @@ #define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ - const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .type_attrs = &_type_attrs, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ @@ -88,10 +88,10 @@ * identify all uapi methods with a (object,method) tuple. However, they have * no type pointer. */ -#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ +#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \ _object_id)[] = { __VA_ARGS__ }; \ - const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ + static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \ .id = _object_id, \ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \ .methods = &UVERBS_OBJECT_METHODS(_object_id) \ -- cgit v1.2.3