From ceb26569af5bc33a8f47b755fd64f12c81801ce8 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 12 May 2017 09:19:36 -0700 Subject: IB/hfi1: Remove unnecessary initialization from tx request The tx request is unnecessarily initialized in the hot code path with memset(), however, there's no need to do this as most fields are initialized later on. this initialization shows to be costly in the profile. Remove unnecessary initialization from tx request and make sure all variables are initialized properly. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d55339f5d73b..16fd519216dc 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -607,12 +607,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx); req = pq->reqs + info.comp_idx; - memset(req, 0, sizeof(*req)); req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ + req->data_len = 0; req->pq = pq; req->cq = cq; req->status = -1; req->ahg_idx = -1; + req->iov_idx = 0; + req->sent = 0; + req->seqnum = 0; + req->seqcomp = 0; + req->seqsubmitted = 0; + req->flags = 0; + req->tids = NULL; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -701,12 +708,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* Save all the IO vector structures */ for (i = 0; i < req->data_iovs; i++) { + req->iovs[i].offset = 0; INIT_LIST_HEAD(&req->iovs[i].list); memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(req->iovs[i].iov)); ret = pin_vector_pages(req, &req->iovs[i]); if (ret) { + req->data_iovs = i; req->status = ret; goto free_req; } @@ -749,6 +758,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->tids = tmp; req->n_tids = ntids; + req->tididx = 0; idx++; } -- cgit v1.2.3 From aa560df381f7199fafa4e7f71382de28f0400270 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 12 May 2017 09:19:47 -0700 Subject: IB/hfi1: Remove unused mk_qpn function Leftover function that is not used. Remove it. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..e91be05062e6 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); -static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, unsigned off) -{ - return (map - qpt->map) * RVT_BITS_PER_PAGE + off; -} - const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { [IB_WR_RDMA_WRITE] = { .length = sizeof(struct ib_rdma_wr), -- cgit v1.2.3 From 7dafbab3753fcf59bc81748e5b2c5bf04e1c62c7 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:19:55 -0700 Subject: IB/hfi1: Add functions to parse BTH/IB headers Improve code readablity by adding inline functions to read specific BTH/IB fields without knowledge of byte offsets. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 6 +-- drivers/infiniband/hw/hfi1/rc.c | 8 ++-- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 6 +-- include/rdma/ib_hdrs.h | 84 +++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/rdmavt_qp.h | 2 +- 8 files changed, 101 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e455a3..0583479f2576 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -286,7 +286,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; } /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(ohdr); if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; @@ -438,7 +438,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, case IB_QPT_GSI: case IB_QPT_UD: rlid = ib_get_slid(hdr); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + rqpn = ib_get_sqpn(ohdr); svc_type = IB_CC_SVCTYPE_UD; is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -461,7 +461,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]); + u16 pkey = ib_bth_get_pkey(ohdr); return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); } diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1080778a1f7c..66e6843aab48 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, ohdr->u.aeth = rvt_compute_aeth(qp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; hdr.lrh[0] = cpu_to_be16(lrh0); @@ -1009,7 +1009,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) return; } - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); reset_sending_psn(qp, psn); /* @@ -1943,7 +1943,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) is_fecn = process_ecn(qp, packet, false); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* @@ -2388,7 +2388,7 @@ void hfi1_rc_hdrerr( if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) return; - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Only deal with RDMA Writes for now */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 5da1e4546543..2a5650f8aee0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -319,7 +319,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) process_ecn(qp, packet, true); - psn = be32_to_cpu(ohdr->bth[2]); + psn = ib_bth_get_psn(ohdr); opcode = ib_bth_get_opcode(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6a4e95cefae5..49fe179ad3ae 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -549,7 +549,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, hdr.lrh[3] = cpu_to_be16(slid); plen = 2 /* PBC */ + hwords; - pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { @@ -689,8 +689,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u16 slid; u8 extra_bytes; - qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; + qkey = ib_get_qkey(ohdr); + src_qp = ib_get_sqpn(ohdr); dlid = ib_get_dlid(hdr); bth1 = be32_to_cpu(ohdr->bth[1]); slid = ib_get_slid(hdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 90e7b77d68e8..128d2917a2d9 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -595,7 +595,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; + qp_num = ib_bth_get_qpn(packet->ohdr); lid = ib_get_dlid(hdr); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -863,7 +863,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /* No vl15 here */ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); @@ -999,7 +999,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 opcode = get_opcode(&tx->phdr.hdr); /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) pbc = hfi1_fault_tx(qp, opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h index 5519f31f043a..c124d515f7d5 100644 --- a/include/rdma/ib_hdrs.h +++ b/include/rdma/ib_hdrs.h @@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) #define IB_LNH_MASK 3 #define IB_SC_MASK 0xf #define IB_SC_SHIFT 12 +#define IB_SC5_MASK 0x10 #define IB_SL_MASK 0xf #define IB_SL_SHIFT 4 +#define IB_SL_SHIFT 4 +#define IB_LVER_MASK 0xf +#define IB_LVER_SHIFT 8 static inline u8 ib_get_lnh(struct ib_header *hdr) { @@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr) return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK); } +static inline bool ib_is_sc5(u16 sc5) +{ + return !!(sc5 & IB_SC5_MASK); +} + static inline u8 ib_get_sl(struct ib_header *hdr) { return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK); @@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr) return (be16_to_cpu(hdr->lrh[3])); } +static inline u8 ib_get_lver(struct ib_header *hdr) +{ + return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) & + IB_LVER_MASK); +} + +static inline u16 ib_get_len(struct ib_header *hdr) +{ + return (u16)(be16_to_cpu(hdr->lrh[2])); +} + +static inline u32 ib_get_qkey(struct ib_other_headers *ohdr) +{ + return be32_to_cpu(ohdr->u.ud.deth[0]); +} + +static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr) +{ + return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK); +} + /* * BTH */ @@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr) #define IB_BTH_PAD_MASK 3 #define IB_BTH_PKEY_MASK 0xffff #define IB_BTH_PAD_SHIFT 20 +#define IB_BTH_A_MASK 1 +#define IB_BTH_A_SHIFT 31 +#define IB_BTH_M_MASK 1 +#define IB_BTH_M_SHIFT 22 +#define IB_BTH_SE_MASK 1 +#define IB_BTH_SE_SHIFT 23 +#define IB_BTH_TVER_MASK 0xf +#define IB_BTH_TVER_SHIFT 16 static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr) { @@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr) IB_BTH_OPCODE_MASK); } +static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) & + IB_BTH_A_MASK); +} + +static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) & + IB_BTH_M_MASK); +} + +static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) & + IB_BTH_SE_MASK); +} + +static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr) +{ + return (u32)(be32_to_cpu(ohdr->bth[2])); +} + +static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr) +{ + return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK); +} + +static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & + IB_BECN_MASK); +} + +static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & + IB_FECN_MASK); +} + +static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) & + IB_BTH_TVER_MASK); +} + #endif /* IB_HDRS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index ba8314ec5768..8f1ce4e27bbd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -664,6 +664,8 @@ union rdma_network_hdr { }; }; +#define IB_QPN_MASK 0xFFFFFF + enum { IB_MULTICAST_QPN = 0xffffff }; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index be6472e5b06b..13f43b3527a8 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -396,7 +396,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) -#define RVT_QPN_MASK 0xFFFFFF +#define RVT_QPN_MASK IB_QPN_MASK /* * QPN-map pages start out as NULL, they get allocated upon -- cgit v1.2.3 From 228d2af1b723deedee38f03d144b7d25b39f6f86 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:08 -0700 Subject: IB/hfi1: Separate input/output header tracing Calls to trace incoming packets will now receive the packet context as parameter. This enables trace support for future packet types. Header trace output is in the format : which makes parsing easier. input_ibhdr trace before change: -0 [001] d.h. 5904.250925: input_ibhdr: [0000:05:00.0] vl 0 lver 0 sl 0 lnh 2,LRH_BTH dlid 0002 len 18 slid 0001 op 0x64,UD_SEND_ONLY se 0 m 0 pad 0 tver 0 pkey 0xffff f 0 b 0 qpn 0x000001 a 0 psn 0x000001b2 deth qkey 0x80010000 sqpn 0x000001 input_ibhdr trace after change: -0 [001] d.h. 6655.714488: input_ibhdr: [0000:05:00.0] (IB) len:124 sc:0 dlid:0x0001 slid:0x0002 lnh:2,LRH_BTH lver:0 sl:0 age:0 becn:0 fecn:0 l4:0 rc:0 entropy:0 op:0x64,UD_SEND_ONLY se:0 m:0 pad:0 tver:0 pkey:0x7fff f:0 b:0 qpn:0x000001 a:0 psn:0x00000036 hlen:8 deth qkey:0x80010000 sqpn:0x000001 Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 49 ----- drivers/infiniband/hw/hfi1/rc.c | 3 +- drivers/infiniband/hw/hfi1/trace.c | 58 +++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 322 ++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/trace_rx.h | 9 + drivers/infiniband/hw/hfi1/verbs.c | 7 +- 6 files changed, 279 insertions(+), 169 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 414a04a481c2..3b76631cbcbd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2086,53 +2086,4 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) - -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - -#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } -#define show_ib_opcode(opcode) \ -__print_symbolic(opcode, \ - ib_opcode_name(RC_SEND_FIRST), \ - ib_opcode_name(RC_SEND_MIDDLE), \ - ib_opcode_name(RC_SEND_LAST), \ - ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_SEND_ONLY), \ - ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_FIRST), \ - ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(RC_RDMA_WRITE_LAST), \ - ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY), \ - ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(RC_RDMA_READ_REQUEST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ - ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ - ib_opcode_name(RC_ACKNOWLEDGE), \ - ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ - ib_opcode_name(RC_COMPARE_SWAP), \ - ib_opcode_name(RC_FETCH_ADD), \ - ib_opcode_name(UC_SEND_FIRST), \ - ib_opcode_name(UC_SEND_MIDDLE), \ - ib_opcode_name(UC_SEND_LAST), \ - ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_SEND_ONLY), \ - ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_FIRST), \ - ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ - ib_opcode_name(UC_RDMA_WRITE_LAST), \ - ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY), \ - ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(UD_SEND_ONLY), \ - ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ - ib_opcode_name(CNP)) #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 66e6843aab48..b443c1e01543 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); + trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), + &hdr, ib_is_sc5(sc5)); /* write the pbc and data */ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index eafae487face..b80b74d0c252 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct ib_header *hdr) +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } -#define IMM_PRN "imm %d" -#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x" -#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" -#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" -#define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %llx" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +{ + return "IB"; +} + +#define IMM_PRN "imm:%d" +#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" +#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" +#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" +#define IETH_PRN "ieth rkey:0x%.8x" +#define ATOMICACKETH_PRN "origdata:%llx" +#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op @@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome) return ""; } +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *becn = ib_bth_get_becn(ohdr); + *fecn = ib_bth_get_fecn(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *se = ib_bth_get_se(ohdr); + *pad = ib_bth_get_pad(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *tver = ib_bth_get_tver(ohdr); + *pkey = ib_bth_get_pkey(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid) +{ + *lnh = ib_get_lnh(hdr); + *lver = ib_get_lver(hdr); + *sl = ib_get_sl(hdr); + *sc = ib_get_sc(hdr) | (sc5 << 4); + *len = ib_get_len(hdr); + *dlid = ib_get_dlid(hdr); + *slid = ib_get_slid(hdr); + + if (*lnh == HFI1_LRH_BTH) + *ohdr = &hdr->u.oth; + else + *ohdr = &hdr->u.l.oth; +} + const char *parse_everbs_hdrs( struct trace_seq *p, u8 opcode, diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 090f6b506953..0f2d2da057ec 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,8 +55,57 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct ib_header *hdr); +#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode } +#define show_ib_opcode(opcode) \ +__print_symbolic(opcode, \ + ib_opcode_name(RC_SEND_FIRST), \ + ib_opcode_name(RC_SEND_MIDDLE), \ + ib_opcode_name(RC_SEND_LAST), \ + ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_SEND_ONLY), \ + ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_FIRST), \ + ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(RC_RDMA_WRITE_LAST), \ + ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY), \ + ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(RC_RDMA_READ_REQUEST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \ + ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \ + ib_opcode_name(RC_ACKNOWLEDGE), \ + ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \ + ib_opcode_name(RC_COMPARE_SWAP), \ + ib_opcode_name(RC_FETCH_ADD), \ + ib_opcode_name(UC_SEND_FIRST), \ + ib_opcode_name(UC_SEND_MIDDLE), \ + ib_opcode_name(UC_SEND_LAST), \ + ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_SEND_ONLY), \ + ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_FIRST), \ + ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \ + ib_opcode_name(UC_RDMA_WRITE_LAST), \ + ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY), \ + ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(UD_SEND_ONLY), \ + ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ + ib_opcode_name(CNP)) + const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, + struct ib_other_headers **ohdr, + u8 *lnh, u8 *lver, u8 *sl, u8 *sc, + u16 *len, u32 *dlid, u32 *slid); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -66,139 +115,198 @@ __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x" +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " #define BTH_PRN \ - "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \ - "f %d b %d qpn 0x%.6x a %d psn 0x%.8x" -#define EHDR_PRN "%s" + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define EHDR_PRN "hlen:%d %s" -DECLARE_EVENT_CLASS(hfi1_ibhdr_template, +DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr), - TP_ARGS(dd, hdr), + struct hfi1_packet *packet, + bool sc5), + TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - /* LRH */ - __field(u8, vl) + __field(u8, lnh) __field(u8, lver) __field(u8, sl) + __field(u16, len) + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) + __field(u8, opcode) + __field(u8, se) + __field(u8, mig) + __field(u8, pad) + __field(u8, tver) + __field(u16, pkey) + __field(u8, fecn) + __field(u8, becn) + __field(u32, qpn) + __field(u8, ack) + __field(u32, psn) + /* extended headers */ + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(packet->hdr)) + ), + TP_fast_assign( + struct ib_other_headers *ohdr; + + DD_DEV_ASSIGN(dd); + + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &ohdr, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), &ohdr->u, + __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) +); + +DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct hfi1_packet *packet, bool sc5), + TP_ARGS(dd, packet, sc5)); + +DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, + bool sc5), + TP_ARGS(dd, hdr, sc5), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) __field(u8, lnh) - __field(u16, dlid) + __field(u8, lver) + __field(u8, sl) __field(u16, len) - __field(u16, slid) - /* BTH */ + __field(u32, dlid) + __field(u8, sc) + __field(u32, slid) __field(u8, opcode) __field(u8, se) - __field(u8, m) + __field(u8, mig) __field(u8, pad) __field(u8, tver) __field(u16, pkey) - __field(u8, f) - __field(u8, b) + __field(u8, fecn) + __field(u8, becn) __field(u32, qpn) - __field(u8, a) + __field(u8, ack) __field(u32, psn) /* extended headers */ - __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) + __dynamic_array(u8, ehdrs, + hfi1_trace_ib_hdr_len(hdr)) ), - TP_fast_assign( + TP_fast_assign( struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); - /* LRH */ - __entry->vl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 12); - __entry->lver = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf; - __entry->sl = - (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->lnh = - (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - __entry->dlid = - be16_to_cpu(hdr->lrh[1]); - /* allow for larger len */ - __entry->len = - be16_to_cpu(hdr->lrh[2]); - __entry->slid = - be16_to_cpu(hdr->lrh[3]); - /* BTH */ - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - __entry->opcode = - (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->se = - (be32_to_cpu(ohdr->bth[0]) >> 23) & 1; - __entry->m = - (be32_to_cpu(ohdr->bth[0]) >> 22) & 1; - __entry->pad = - (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; - __entry->tver = - (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf; - __entry->pkey = - be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->f = - (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) & - IB_FECN_MASK; - __entry->b = - (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) & - IB_BECN_MASK; - __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->a = - (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; - /* allow for larger PSN */ - __entry->psn = - be32_to_cpu(ohdr->bth[2]) & 0x7fffffff; + + hfi1_trace_parse_9b_hdr(hdr, sc5, + &ohdr, &__entry->lnh, + &__entry->lver, &__entry->sl, + &__entry->sc, &__entry->len, + &__entry->dlid, &__entry->slid); + + hfi1_trace_parse_bth(ohdr, &__entry->ack, + &__entry->becn, &__entry->fecn, + &__entry->mig, &__entry->se, + &__entry->pad, &__entry->opcode, + &__entry->tver, &__entry->pkey, + &__entry->psn, &__entry->qpn); + /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - ibhdr_exhdr_len(hdr)); - ), - TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN, - __get_str(dev), - /* LRH */ - __entry->vl, - __entry->lver, - __entry->sl, - __entry->lnh, show_lnh(__entry->lnh), - __entry->dlid, - __entry->len, - __entry->slid, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->m, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->f, - __entry->b, - __entry->qpn, - __entry->a, - __entry->psn, - /* extended headers */ - __parse_ib_ehdrs( - __entry->opcode, - (void *)__get_dynamic_array(ehdrs)) - ) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); + ), + TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " + BTH_PRN " " EHDR_PRN, + __get_str(dev), + __entry->len, + __entry->sc, + __entry->dlid, + __entry->slid, + __entry->lnh, show_lnh(__entry->lnh), + __entry->lver, + __entry->sl, + /* BTH */ + __entry->opcode, show_ib_opcode(__entry->opcode), + __entry->se, + __entry->mig, + __entry->pad, + __entry->tver, + __entry->pkey, + __entry->fecn, + __entry->becn, + __entry->qpn, + __entry->ack, + __entry->psn, + /* extended headers */ + __get_dynamic_array_len(ehdrs), + __parse_ib_ehdrs( + __entry->opcode, + (void *)__get_dynamic_array(ehdrs)) + ) ); -DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); +DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, + TP_PROTO(struct hfi1_devdata *dd, + struct ib_header *hdr, bool sc5), + TP_ARGS(dd, hdr, sc5)); -DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), - TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f77e59fb43fe..05fc6d68ffe8 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -55,6 +55,15 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 128d2917a2d9..5f4be35f31b6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -589,8 +589,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) goto drop; } - trace_input_ibhdr(rcd->dd, hdr); - + trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); opcode = ib_bth_get_opcode(packet->ohdr); inc_opstats(tlen, &rcd->opstats->stats[opcode]); @@ -885,7 +884,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, return ret; } trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; bail_ecomm: @@ -1058,7 +1057,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &ps->s_txreq->phdr.hdr); + &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); pio_bail: if (qp->s_wqe) { -- cgit v1.2.3 From 9039746cdf39dcbf2ddfcc4a68f729cbbbc853df Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 12 May 2017 09:20:20 -0700 Subject: IB/hfi1: Setup common IB fields in hfi1_packet struct We move many common IB fields into the hfi1_packet structure and set them up in a single function. This allows us to set the fields in a single place and not deal with them throughout the driver. Reviewed-by: Brian Welty Reviewed-by: Dasaratharaman Chandramouli Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 -- drivers/infiniband/hw/hfi1/chip.h | 2 - drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 158 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/hfi.h | 21 ++++- drivers/infiniband/hw/hfi1/rc.c | 33 +++----- drivers/infiniband/hw/hfi1/ruc.c | 89 ++++++++++---------- drivers/infiniband/hw/hfi1/uc.c | 16 +--- drivers/infiniband/hw/hfi1/ud.c | 23 ++---- drivers/infiniband/hw/hfi1/verbs.c | 89 +++++++++----------- drivers/infiniband/hw/hfi1/verbs.h | 6 +- 11 files changed, 236 insertions(+), 211 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2ba00b89df6a..5dbee3c1bd45 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9810,15 +9810,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) -{ - u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - - return (struct ib_header *) - (rhf_addr - dd->rhf_offset + offset); -} - static const char * const ib_cfg_name_strings[] = { "HFI1_IB_CFG_LIDLMC", "HFI1_IB_CFG_LWID_DG_ENB", diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index cbe455d9ab8b..0b4f418ba0ac 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1347,8 +1347,6 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct ib_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr); void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 995d62c7f9a7..ba9ab971ced9 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -325,6 +325,7 @@ struct diag_pkt { #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ /* misc. */ +#define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define LIM_MGMT_P_KEY 0x7FFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0583479f2576..2a1022e374a5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -224,6 +224,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf, (offset * RCV_BUF_BLOCK_SIZE)); } +static inline void *hfi1_get_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); + + return (void *)(rhf_addr - dd->rhf_offset + offset); +} + +static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct ib_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -249,7 +263,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - int lnh = ib_get_lnh(rhdr); + u8 lnh = ib_get_lnh(rhdr); + bool has_grh = false; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -257,37 +272,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + has_grh = true; + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } + if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = ib_get_dlid(rhdr); + u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 rcv_flags = 0; + u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (lnh == HFI1_LRH_BTH) { - ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { + if (has_grh) { u32 vtf; + struct ib_grh *grh = packet->grh; - ohdr = &rhdr->u.l.oth; - if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + if (grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; - rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; } + /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(ohdr); - if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { + qp_num = ib_bth_get_qpn(packet->ohdr); + if (dlid < mlid_base) { struct rvt_qp *qp; unsigned long flags; @@ -312,11 +332,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (qp->ibqp.qp_type) { case IB_QPT_RC: - hfi1_rc_hdrerr( - rcd, - rhdr, - rcv_flags, - qp); + hfi1_rc_hdrerr(rcd, packet, qp); break; default: /* For now don't handle any other QP types */ @@ -332,9 +348,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, switch (rte) { case RHF_RTE_ERROR_OP_CODE_ERR: { - u32 opcode; void *ebuf = NULL; - __be32 *bth = NULL; + u8 opcode; if (rhf_use_egr_bfr(packet->rhf)) ebuf = packet->ebuf; @@ -342,16 +357,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (!ebuf) goto drop; /* this should never happen */ - if (lnh == HFI1_LRH_BTH) - bth = (__be32 *)ebuf; - else if (lnh == HFI1_LRH_GRH) - bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh)); - else - goto drop; - - opcode = be32_to_cpu(bth[0]) >> 24; - opcode &= 0xff; - + opcode = ib_bth_get_opcode(packet->ohdr); if (opcode == IB_OPCODE_CNP) { /* * Only in pre-B0 h/w is the CNP_OPCODE handled @@ -365,7 +371,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; + lqpn = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (!qp) { @@ -415,7 +421,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhqoff = rcd->head; packet->numpkt = 0; - packet->rcv_flags = 0; } void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, @@ -424,15 +429,12 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; - struct ib_grh *grh = NULL; + struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; u16 rlid, dlid = ib_get_dlid(hdr); u8 sc, svc_type; bool is_mcast = false; - if (pkt->rcv_flags & HFI1_HAS_GRH) - grh = &hdr->u.l.grh; - switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: @@ -591,9 +593,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (lnh == HFI1_LRH_BTH) { packet->ohdr = &hdr->u.oth; + packet->grh = NULL; } else if (lnh == HFI1_LRH_GRH) { packet->ohdr = &hdr->u.l.oth; - packet->rcv_flags |= HFI1_HAS_GRH; + packet->grh = &hdr->u.l.grh; } else { goto next; /* just in case */ } @@ -698,10 +701,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; - packet->hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; packet->etype = rhf_rcv_type(packet->rhf); + + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ @@ -759,7 +761,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet) packet->etail, 0, 0); packet->updegr = 0; } - packet->rcv_flags = 0; + packet->grh = NULL; } static inline void finish_packet(struct hfi1_packet *packet) @@ -896,12 +898,15 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); + u8 sc = SC15_PACKET; - if (etype == RHF_RCV_TYPE_IB && - hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB) { + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } + if (sc != SC15_PACKET) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -1321,6 +1326,58 @@ bail: return ret; } +static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) +{ + packet->hdr = (struct hfi1_ib_message_header *) + hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; +} + +static int hfi1_setup_9B_packet(struct hfi1_packet *packet) +{ + struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); + struct ib_header *hdr; + u8 lnh; + + hfi1_setup_ib_header(packet); + hdr = packet->hdr; + + lnh = ib_get_lnh(hdr); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + packet->grh = NULL; + } else if (lnh == HFI1_LRH_GRH) { + u32 vtf; + + packet->ohdr = &hdr->u.l.oth; + packet->grh = &hdr->u.l.grh; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->slid = ib_get_slid(hdr); + packet->dlid = ib_get_dlid(hdr); + packet->sl = ib_get_sl(hdr); + packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); + packet->pad = ib_bth_get_pad(packet->ohdr); + packet->extra_byte = 0; + packet->fecn = ib_bth_get_fecn(packet->ohdr); + packet->becn = ib_bth_get_becn(packet->ohdr); + + return 0; +drop: + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1351,6 +1408,9 @@ int process_receive_ib(struct hfi1_packet *packet) if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + if (hfi1_setup_9B_packet(packet)) + return RHF_RCV_CONTINUE; + trace_hfi1_rcvhdr(packet->rcd->ppd->dd, packet->rcd->ctxt, rhf_err_flags(packet->rhf), @@ -1422,6 +1482,7 @@ int process_receive_error(struct hfi1_packet *packet) rhf_rcv_type_err(packet->rhf) == 3)) return RHF_RCV_CONTINUE; + hfi1_setup_ib_header(packet); handle_eflags(packet); if (unlikely(rhf_err_flags(packet->rhf))) @@ -1435,6 +1496,8 @@ int kdeth_process_expected(struct hfi1_packet *packet) { if (unlikely(hfi1_dbg_fault_packet(packet))) return RHF_RCV_CONTINUE; + + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1445,6 +1508,7 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { + hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); if (unlikely(hfi1_dbg_fault_packet(packet))) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3b76631cbcbd..9c6c73448461 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -356,17 +356,26 @@ struct hfi1_packet { __le32 *rhf_addr; struct rvt_qp *qp; struct ib_other_headers *ohdr; + struct ib_grh *grh; u64 rhf; u32 maxcnt; u32 rhqoff; + u32 dlid; + u32 slid; u16 tlen; s16 etail; u8 hlen; u8 numpkt; u8 rsize; u8 updegr; - u8 rcv_flags; u8 etype; + u8 extra_byte; + u8 pad; + u8 sc; + u8 sl; + u8 opcode; + bool becn; + bool fecn; }; struct rvt_sge_state; @@ -2086,4 +2095,14 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) + +/* + * hfi1_check_mcast- Check if the given lid is + * in the IB multicast range. + */ +static inline bool hfi1_check_mcast(u16 lid) +{ + return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (lid != be16_to_cpu(IB_LID_PERMISSIVE))); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index b443c1e01543..baa67bf0772b 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 bth0; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; int diff; @@ -1938,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) u32 rkey; lockdep_assert_held(&qp->r_lock); + bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -2075,8 +2073,6 @@ no_immediate_data: wc.wc_flags = 0; wc.ex.imm_data = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -2369,28 +2365,19 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp) { - int has_grh = rcv_flags & HFI1_HAS_GRH; - struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = rcd_to_iport(rcd); int diff; u32 opcode; - u32 psn, bth0; - - /* Check for GRH */ - ohdr = &hdr->u.oth; - if (has_grh) - ohdr = &hdr->u.l.oth; + u32 psn; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; - psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); + psn = ib_bth_get_psn(packet->ohdr); + opcode = ib_bth_get_opcode(packet->ohdr); /* Only deal with RDMA Writes for now */ if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 3a17daba28a9..9cc9c7be9dd4 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0) +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) { __be64 guid; unsigned long flags; + struct rvt_qp *qp = packet->qp; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - - if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { - if (!has_grh) { + u32 dlid = packet->dlid; + u32 slid = packet->slid; + u32 sl = packet->sl; + int migrated; + u32 bth0, bth1; + + bth0 = be32_to_cpu(packet->ohdr->bth[0]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); + migrated = bth0 & IB_BTH_MIG_REQ; + + if (qp->s_mig_state == IB_MIG_ARMED && migrated) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->alt_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->alt_ah_attr) || + if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) || ppd_from_ibp(ibp)->port != rdma_ah_get_port_num(&qp->alt_ah_attr)) - goto err; + return 1; spin_lock_irqsave(&qp->s_lock, flags); hfi1_migrate_qp(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } else { - if (!has_grh) { + if (!packet->grh) { if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - goto err; + return 1; } else { const struct ib_global_route *grh; if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) - goto err; + return 1; grh = rdma_ah_read_grh(&qp->remote_ah_attr); guid = get_sguid(ibp, grh->sgid_index); - if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, + if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, guid)) - goto err; + return 1; if (!gid_ok( - &hdr->u.l.grh.sgid, + &packet->grh->sgid, grh->dgid.global.subnet_prefix, grh->dgid.global.interface_id)) - goto err; + return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, - ib_get_slid(hdr)))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - (u16)bth0, - ib_get_sl(hdr), - 0, qp->ibqp.qp_num, - ib_get_slid(hdr), - ib_get_dlid(hdr)); - goto err; + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + sc5, slid))) { + hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); + return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ - if (ib_get_slid(hdr) != - rdma_ah_get_dlid(&qp->remote_ah_attr) || + if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) || ppd_from_ibp(ibp)->port != qp->port_num) - goto err; - if (qp->s_mig_state == IB_MIG_REARM && - !(bth0 & IB_BTH_MIG_REQ)) + return 1; + if (qp->s_mig_state == IB_MIG_REARM && !migrated) qp->s_mig_state = IB_MIG_ARMED; } return 0; - -err: - return 1; } /** diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 2a5650f8aee0..76c2451a53d7 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,31 +297,25 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0, opcode; + u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; u32 psn; - u32 pad; + u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; struct ib_reth *reth; - int has_grh = rcv_flags & HFI1_HAS_GRH; int ret; - bth0 = be32_to_cpu(ohdr->bth[0]); - if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) + if (hfi1_ruc_check_hdr(ibp, packet)) return; process_ecn(qp, packet, true); psn = ib_bth_get_psn(ohdr); - opcode = ib_bth_get_opcode(ohdr); - /* Compare the PSN verses the expected PSN. */ if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { /* @@ -432,8 +426,6 @@ no_immediate_data: wc.ex.imm_data = 0; wc.wc_flags = 0; send_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) @@ -527,8 +519,6 @@ rdma_first: rdma_last_imm: wc.wc_flags = IB_WC_WITH_IMM; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 49fe179ad3ae..c995aa58c36a 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -668,36 +668,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, void hfi1_ud_rcv(struct hfi1_packet *packet) { struct ib_other_headers *ohdr = packet->ohdr; - int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; u32 qkey; u32 src_qp; - u16 dlid, pkey; + u16 pkey; int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - bool has_grh = rcv_flags & HFI1_HAS_GRH; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); u32 bth1; - u8 sl_from_sc, sl; - u16 slid; - u8 extra_bytes; + u8 sl_from_sc; + u8 extra_bytes = packet->pad; + u8 opcode = packet->opcode; + u8 sl = packet->sl; + u32 dlid = packet->dlid; + u32 slid = packet->slid; + bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - dlid = ib_get_dlid(hdr); - bth1 = be32_to_cpu(ohdr->bth[1]); - slid = ib_get_slid(hdr); pkey = ib_bth_get_pkey(ohdr); - opcode = ib_bth_get_opcode(ohdr); - sl = ib_get_sl(hdr); - extra_bytes = ib_bth_get_pad(ohdr); extra_bytes += (SIZE_OF_CRC << 2); sl_from_sc = ibp->sc_to_sl[sc5]; @@ -811,7 +806,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) qp->r_flags |= RVT_R_REUSE_SGE; goto drop; } - if (has_grh) { + if (packet->grh) { hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 5f4be35f31b6..af54d3f4696a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -508,13 +508,14 @@ again: /* * Make sure the QP is ready and able to accept the given opcode. */ -static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) +static inline opcode_handler qp_ok(struct hfi1_packet *packet) { if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) return NULL; - if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || - (opcode == IB_OPCODE_CNP)) - return opcode_handler_tbl[opcode]; + if (((packet->opcode & RVT_OPCODE_QP_MASK) == + packet->qp->allowed_ops) || + (packet->opcode == IB_OPCODE_CNP)) + return opcode_handler_tbl[packet->opcode]; return NULL; } @@ -548,68 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } -/** - * hfi1_ib_rcv - process an incoming packet - * @packet: data packet information - * - * This is called to process an incoming packet at interrupt level. - * - * Tlen is the length of the header + data + CRC in bytes. - */ -void hfi1_ib_rcv(struct hfi1_packet *packet) +static inline void hfi1_handle_packet(struct hfi1_packet *packet, + bool is_mcast) { + u32 qp_num; struct hfi1_ctxtdata *rcd = packet->rcd; - struct ib_header *hdr = packet->hdr; - u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; opcode_handler packet_handler; unsigned long flags; - u32 qp_num; - int lnh; - u8 opcode; - u16 lid; - - /* Check for GRH */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &hdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - u32 vtf; - - packet->ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) - goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); - if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) - goto drop; - packet->rcv_flags |= HFI1_HAS_GRH; - } else { - goto drop; - } - trace_input_ibhdr(rcd->dd, packet, !!(packet->rhf & RHF_DC_INFO_SMASK)); - opcode = ib_bth_get_opcode(packet->ohdr); - inc_opstats(tlen, &rcd->opstats->stats[opcode]); + inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]); - /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); - lid = ib_get_dlid(hdr); - if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { + if (unlikely(is_mcast)) { struct rvt_mcast *mcast; struct rvt_mcast_qp *p; - if (lnh != HFI1_LRH_GRH) + if (!packet->grh) goto drop; - mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid); + mcast = rvt_mcast_find(&ibp->rvp, + &packet->grh->dgid, + packet->dlid); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -623,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) if (atomic_dec_return(&mcast->refcount) <= 1) wake_up(&mcast->wait); } else { + /* Get the destination QP number. */ + qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; } - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode, + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, true))) { rcu_read_unlock(); goto drop; } spin_lock_irqsave(&packet->qp->r_lock, flags); - packet_handler = qp_ok(opcode, packet); + packet_handler = qp_ok(packet); if (likely(packet_handler)) packet_handler(packet); else @@ -644,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) rcu_read_unlock(); } return; - drop: ibp->rvp.n_pkt_drops++; } +/** + * hfi1_ib_rcv - process an incoming packet + * @packet: data packet information + * + * This is called to process an incoming packet at interrupt level. + */ +void hfi1_ib_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + bool is_mcast = false; + + if (unlikely(hfi1_check_mcast(packet->dlid))) + is_mcast = true; + + trace_input_ibhdr(rcd->dd, packet, + !!(packet->rhf & RHF_DC_INFO_SMASK)); + hfi1_handle_packet(packet, is_mcast); +} + /* * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index cd635d0c1d3b..17b38cd5f654 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct ib_header *hdr, - u32 rcv_flags, + struct hfi1_packet *packet, struct rvt_qp *qp); u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); @@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, - int has_grh, struct rvt_qp *qp, u32 bth0); +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, const struct ib_global_route *grh, u32 hwords, u32 nwords); -- cgit v1.2.3 From 14fe13fcd3afb96b06809f280b586be1c998332c Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:20:31 -0700 Subject: IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok() SGEs that are contiguous needlessly consume driver dependent TX resources. The lkey validation logic is enhanced to compress the SGE that ends up in the send wqe when consecutive addresses are detected. The lkey validation API used to return 1 (success) or 0 (fail). The return value is now an -errno, 0 (compressed), or 1 (uncompressed). A additional argument is added to pass the last SQE for the compression. Loopback callers always pass a NULL to last_sge since the optimization is of little benefit in that situation. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Venkata Sandeep Dhanalakota Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 2 +- drivers/infiniband/hw/qib/qib_ruc.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 51 +++++++++++++++++++++++---- drivers/infiniband/sw/rdmavt/qp.c | 23 ++++++------ drivers/infiniband/sw/rdmavt/trace_mr.h | 62 +++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_tx.h | 11 +++--- include/rdma/rdma_vt.h | 3 +- 7 files changed, 130 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cc9c7be9dd4..476fe5da2992 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index bd09de7c6e56..88d84cbf7e5a 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index aa5f9ea318e4..ea95672d9675 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -777,24 +777,55 @@ out: return ret; } +/** + * rvt_sge_adjacent - is isge compressible + * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written + * @sge: SGE to check + * + * If adjacent will update last_sge to add length. + * + * Return: true if isge is adjacent to last sge + */ +static inline bool rvt_sge_adjacent(struct rvt_sge *isge, + struct rvt_sge *last_sge, + struct ib_sge *sge) +{ + if (last_sge && sge->lkey == last_sge->mr->lkey && + ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) { + if (sge->lkey) { + if (unlikely((sge->addr - last_sge->mr->user_base + + sge->length > last_sge->mr->length))) + return false; /* overrun, caller will catch */ + } else { + last_sge->length += sge->length; + } + last_sge->sge_length += sge->length; + trace_rvt_sge_adjacent(last_sge, sge); + return true; + } + return false; +} + /** * rvt_lkey_ok - check IB SGE for validity and initialize * @rkt: table containing lkey to check SGE against * @pd: protection domain * @isge: outgoing internal SGE + * @last_sge: last outgoing SGE written * @sge: SGE to check * @acc: access flags * * Check the IB SGE for validity and initialize our internal version * of it. * - * Return: 1 if valid and successful, otherwise returns 0. - * - * increments the reference count upon success + * Increments the reference count when a new sge is stored. * + * Return: 0 if compressed, 1 if added , otherwise returns -errno. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc) + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc) { struct rvt_mregion *mr; unsigned n, m; @@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, * We use LKEY == zero for kernel virtual addresses * (see rvt_get_dma_mr() and dma_virt_ops). */ - rcu_read_lock(); if (sge->lkey == 0) { struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); if (pd->user) - goto bail; + return -EINVAL; + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; @@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } + if (rvt_sge_adjacent(isge, last_sge, sge)) + return 0; + rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (!mr) goto bail; @@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->m = m; isge->n = n; ok: + trace_rvt_sge_new(isge, sge); return 1; bail_unref: rvt_put_mr(mr); bail: rcu_read_unlock(); - return 0; + return -EINVAL; } EXPORT_SYMBOL(rvt_lkey_ok); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..a3dd1e536860 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1646,7 +1646,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; - int ret; + int ret, incr; size_t cplen; bool reserved_op; int local_ops_delayed = 0; @@ -1719,22 +1719,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->length = 0; j = 0; if (wr->num_sge) { + struct rvt_sge *last_sge = NULL; + acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0; for (i = 0; i < wr->num_sge; i++) { u32 length = wr->sg_list[i].length; - int ok; if (length == 0) continue; - ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], - &wr->sg_list[i], acc); - if (!ok) { - ret = -EINVAL; - goto bail_inval_free; - } + incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, + &wr->sg_list[i], acc); + if (unlikely(incr < 0)) + goto bail_lkey_error; wqe->length += length; - j++; + if (incr) + last_sge = &wqe->sg_list[j]; + j += incr; } wqe->wr.num_sge = j; } @@ -1781,12 +1782,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp, wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; qp->s_avail--; } - trace_rvt_post_one_wr(qp, wqe); + trace_rvt_post_one_wr(qp, wqe, wr->num_sge); smp_wmb(); /* see request builders */ qp->s_head = next; return 0; +bail_lkey_error: + ret = incr; bail_inval_free: /* release mr holds */ while (j) { diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 3318a6c36373..976e482930a3 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -103,6 +103,68 @@ DEFINE_EVENT( TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), TP_ARGS(mr, m, n, v, len)); +DECLARE_EVENT_CLASS( + rvt_sge_template, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device)) + __field(struct rvt_mregion *, mr) + __field(struct rvt_sge *, sge) + __field(struct ib_sge *, isge) + __field(void *, vaddr) + __field(u64, ivaddr) + __field(u32, lkey) + __field(u32, sge_length) + __field(u32, length) + __field(u32, ilength) + __field(int, user) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device)); + __entry->mr = sge->mr; + __entry->sge = sge; + __entry->isge = isge; + __entry->vaddr = sge->vaddr; + __entry->ivaddr = isge->addr; + __entry->lkey = sge->mr->lkey; + __entry->sge_length = sge->sge_length; + __entry->length = sge->length; + __entry->ilength = isge->length; + __entry->m = sge->m; + __entry->n = sge->m; + __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user; + ), + TP_printk( + "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u", + __get_str(dev), + __entry->mr, + __entry->sge, + __entry->isge, + __entry->vaddr, + __entry->ivaddr, + __entry->lkey, + __entry->sge_length, + __entry->length, + __entry->ilength, + __entry->m, + __entry->n, + __entry->user + ) +); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_adjacent, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + +DEFINE_EVENT( + rvt_sge_template, rvt_sge_new, + TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), + TP_ARGS(sge, isge)); + #endif /* __RVT_TRACE_MR_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h index a613a2223751..0ef25fc49f25 100644 --- a/drivers/infiniband/sw/rdmavt/trace_tx.h +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -84,12 +84,12 @@ __print_symbolic(opcode, \ wr_opcode_name(RESERVED10)) #define POS_PRN \ -"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u" +"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u" TRACE_EVENT( rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge), + TP_ARGS(qp, wqe, wr_num_sge), TP_STRUCT__entry( RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) __field(u64, wr_id) @@ -108,6 +108,7 @@ TRACE_EVENT( __field(int, send_flags) __field(pid_t, pid) __field(int, num_sge) + __field(int, wr_num_sge) ), TP_fast_assign( RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) @@ -127,6 +128,7 @@ TRACE_EVENT( __entry->ssn = wqe->ssn; __entry->send_flags = wqe->wr.send_flags; __entry->num_sge = wqe->wr.num_sge; + __entry->wr_num_sge = wr_num_sge; ), TP_printk( POS_PRN, @@ -146,7 +148,8 @@ TRACE_EVENT( __entry->head, __entry->last, __entry->pid, - __entry->num_sge + __entry->num_sge, + __entry->wr_num_sge ) ); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..d0b9f91e5f4d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -515,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, - struct rvt_sge *isge, struct ib_sge *sge, int acc); + struct rvt_sge *isge, struct rvt_sge *last_sge, + struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid, u16 lid); -- cgit v1.2.3 From 7be85676f1d13c77a7e0c72e04903bfd39580d4f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:12 -0700 Subject: IB/hfi1: Don't remove RB entry when not needed. An RB tree is used for the SDMA pinning cache. Cache entries are extracted and reinserted from the tree in case the address range for it changes. However, if the address range for the entry doesn't change, deleting the entry from the RB tree is not necessary. This affects performance since the tree needs to be rebalanced for each insertion, and this happens in the hot path. Optimize RB search by not removing entries when it's not needed. Reviewed-by: Mike Marciniszyn Reviewed-by: Mitko Haralanov Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mmu_rb.c | 14 ++++++++++---- drivers/infiniband/hw/hfi1/mmu_rb.h | 5 +++-- drivers/infiniband/hw/hfi1/user_sdma.c | 23 ++++++++++++++++------- 3 files changed, 29 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index ccbf52c8ff6f..d41fd87a39f2 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len) +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node) { struct mmu_rb_node *node; unsigned long flags; + bool ret = false; spin_lock_irqsave(&handler->lock, flags); node = __mmu_rb_search(handler, addr, len); if (node) { + if (node->addr == addr && node->len == len) + goto unlock; __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ + ret = true; } +unlock: spin_unlock_irqrestore(&handler->lock, flags); - - return node; + *rb_node = node; + return ret; } void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 754f6ebf13fb..f04cec1e99d1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); -struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler, - unsigned long addr, unsigned long len); +bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler, + unsigned long addr, unsigned long len, + struct mmu_rb_node **rb_node); #endif /* _HFI1_MMU_RB_H */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 16fd519216dc..79450cf2a3d5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1165,14 +1165,23 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; - - rb_node = hfi1_mmu_rb_extract(pq->handler, - (unsigned long)iovec->iov.iov_base, - iovec->iov.iov_len); - if (rb_node) + bool extracted; + + extracted = + hfi1_mmu_rb_remove_unless_exact(pq->handler, + (unsigned long) + iovec->iov.iov_base, + iovec->iov.iov_len, &rb_node); + if (rb_node) { node = container_of(rb_node, struct sdma_mmu_node, rb); - else - rb_node = NULL; + if (!extracted) { + atomic_inc(&node->refcount); + iovec->pages = node->pages; + iovec->npages = node->npages; + iovec->node = node; + return 0; + } + } if (!node) { node = kzalloc(sizeof(*node), GFP_KERNEL); -- cgit v1.2.3 From e3304b7cc4f14d365f46d6847a35563ae8b017f7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:18 -0700 Subject: IB/hfi1: Optimize cachelines for user SDMA request structure The current user SDMA request structure layout has holes. The cachelines can be reduced to improve cacheline trading. Separate fields in the following categories: mostly read, writable and shared with interrupt. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 106 ++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 48 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 79450cf2a3d5..92517cebb4c7 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -117,6 +117,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -204,25 +205,42 @@ struct evict_data { }; struct user_sdma_request { - struct sdma_req_info info; - struct hfi1_user_sdma_pkt_q *pq; - struct hfi1_user_sdma_comp_q *cq; /* This is the original header from user space */ struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; /* * Pointer to the SDMA engine for this request. * Since different request could be on different VLs, * each request will need it's own engine pointer. */ struct sdma_engine *sde; - s8 ahg_idx; - u32 ahg[9]; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors */ - u32 koffset; + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + unsigned long flags; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; /* * KDETH.OFFSET (TID) field * The offset can cover multiple packets, depending on the @@ -230,29 +248,19 @@ struct user_sdma_request { */ u32 tidoffset; /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. */ - unsigned data_iovs; - /* total length of the data in the request */ - u32 data_len; + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; /* progress index moving along the iovs array */ - unsigned iov_idx; + u8 iov_idx; + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; - /* number of elements copied to the tids array */ - u16 n_tids; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - u16 tididx; - u32 sent; - u64 seqnum; - u64 seqcomp; - u64 seqsubmitted; - struct list_head txps; - unsigned long flags; - /* status of the last txreq completed */ - int status; -}; +} ____cacheline_aligned_in_smp; /* * A single txreq could span up to 3 physical pages when the MTU @@ -1034,11 +1042,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) datalen); if (changes < 0) goto free_tx; - sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, changes, - req->ahg, sizeof(req->hdr), - user_sdma_txreq_cb); } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + @@ -1442,21 +1445,22 @@ done: } static int set_txreq_header_ahg(struct user_sdma_request *req, - struct user_sdma_txreq *tx, u32 len) + struct user_sdma_txreq *tx, u32 datalen) { + u32 ahg[AHG_KDETH_ARRAY_SIZE]; int diff = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); - u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len)); + u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(req->ahg, diff, 0, 0, 12, + AHG_HEADER_SET(ahg, diff, 0, 0, 12, cpu_to_le16(LRH2PBC(lrhlen))); /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(req->ahg, diff, 3, 0, 16, + AHG_HEADER_SET(ahg, diff, 3, 0, 16, cpu_to_be16(lrhlen >> 2)); } @@ -1468,13 +1472,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); + AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); /* KDETH.Offset */ - AHG_HEADER_SET(req->ahg, diff, 15, 0, 16, + AHG_HEADER_SET(ahg, diff, 15, 0, 16, cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(req->ahg, diff, 15, 16, 16, - cpu_to_le16(req->koffset >> 16)); + AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1492,9 +1495,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, * we have to check again. */ if (++req->tididx > req->n_tids - 1 || - !req->tids[req->tididx]) { + !req->tids[req->tididx]) return -EINVAL; - } tidval = req->tids[req->tididx]; } omfactor = ((EXP_TID_GET(tidval, LEN) * @@ -1502,7 +1504,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(req->ahg, diff, 7, 0, 16, + AHG_HEADER_SET(ahg, diff, 7, 0, 16, ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) & 0x7fff))); @@ -1522,12 +1524,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); + AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); } + if (diff < 0) + return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, req->ahg, diff, tidval); + req->ahg_idx, ahg, diff, tidval); + sdma_txinit_ahg(&tx->txreq, + SDMA_TXREQ_F_USE_AHG, + datalen, req->ahg_idx, diff, + ahg, sizeof(req->hdr), + user_sdma_txreq_cb); + return diff; } -- cgit v1.2.3 From 721c462123b4b53745c1ccd99619b16e8f4e091b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:25 -0700 Subject: IB/hfi1: Name function prototype parameters for affinity module To improve the readability of function prototypes, give the parameters names in the affinity module. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index e78c7aa094e0..2a1e374169c0 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -75,24 +75,26 @@ struct hfi1_msix_entry; /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void); /* Initialize driver affinity data */ -int hfi1_dev_affinity_init(struct hfi1_devdata *); +int hfi1_dev_affinity_init(struct hfi1_devdata *dd); /* * Set IRQ affinity to a CPU. The function will determine the * CPU and set the affinity to it. */ -int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Remove the IRQ's CPU affinity. This function also updates * any internal CPU tracking data */ -void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *); +void hfi1_put_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix); /* * Determine a CPU affinity for a user process, if the process does not * have an affinity set yet. */ -int hfi1_get_proc_affinity(int); +int hfi1_get_proc_affinity(int node); /* Release a CPU used by a user process. */ -void hfi1_put_proc_affinity(int); +void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; -- cgit v1.2.3 From bb7dde8784913c06ccd1456bed6dcc5ebd0b3c24 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 26 May 2017 05:35:31 -0700 Subject: IB/hfi1: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 18 +++--- drivers/infiniband/hw/hfi1/chip.c | 105 +++++++++++++++++----------------- drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/pcie.c | 80 ++++++-------------------- 4 files changed, 78 insertions(+), 131 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index e2cd2cd3b28a..a97055dd4fbd 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) sde->cpu = cpu; cpumask_clear(&msix->mask); cpumask_set_cpu(cpu, &msix->mask); - dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], sde->this_idx, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); /* * Set the new cpu in the hfi1_affinity_node and clean @@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) { struct irq_affinity_notify *notify = &msix->notify; - notify->irq = msix->msix.vector; + notify->irq = msix->irq; notify->notify = hfi1_irq_notifier_notify; notify->release = hfi1_irq_notifier_release; @@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd, } cpumask_set_cpu(cpu, &msix->mask); - dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", - msix->msix.vector, irq_type_names[msix->type], + dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", + msix->irq, irq_type_names[msix->type], extra, cpu); - irq_set_affinity_hint(msix->msix.vector, &msix->mask); + irq_set_affinity_hint(msix->irq, &msix->mask); if (msix->type == IRQ_SDMA) { sde->cpu = cpu; @@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, } } - irq_set_affinity_hint(msix->msix.vector, NULL); + irq_set_affinity_hint(msix->irq, NULL); cpumask_clear(&msix->mask); mutex_unlock(&node_affinity.lock); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5dbee3c1bd45..9118618d28e5 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12800,30 +12800,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) for (i = 0; i < dd->num_msix_entries; i++, me++) { if (!me->arg) /* => no irq, no affinity */ continue; - hfi1_put_irq_affinity(dd, &dd->msix_entries[i]); - free_irq(me->msix.vector, me->arg); + hfi1_put_irq_affinity(dd, me); + free_irq(me->irq, me->arg); } + + /* clean structures */ + kfree(dd->msix_entries); + dd->msix_entries = NULL; + dd->num_msix_entries = 0; } else { /* INTx */ if (dd->requested_intx_irq) { free_irq(dd->pcidev->irq, dd); dd->requested_intx_irq = 0; } - } - - /* turn off interrupts */ - if (dd->num_msix_entries) { - /* MSI-X */ - pci_disable_msix(dd->pcidev); - } else { - /* INTx */ disable_intx(dd->pcidev); } - /* clean structures */ - kfree(dd->msix_entries); - dd->msix_entries = NULL; - dd->num_msix_entries = 0; + pci_free_irq_vectors(dd->pcidev); } /* @@ -12972,13 +12966,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd) continue; /* make sure the name is terminated */ me->name[sizeof(me->name) - 1] = 0; + me->irq = pci_irq_vector(dd->pcidev, i); + /* + * On err return me->irq. Don't need to clear this + * because 'arg' has not been set, and cleanup will + * do the right thing. + */ + if (me->irq < 0) + return me->irq; - ret = request_threaded_irq(me->msix.vector, handler, thread, 0, + ret = request_threaded_irq(me->irq, handler, thread, 0, me->name, arg); if (ret) { dd_dev_err(dd, - "unable to allocate %s interrupt, vector %d, index %d, err %d\n", - err_info, me->msix.vector, idx, ret); + "unable to allocate %s interrupt, irq %d, index %d, err %d\n", + err_info, me->irq, idx, ret); return ret; } /* @@ -12989,8 +12991,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ret = hfi1_get_irq_affinity(dd, me); if (ret) - dd_dev_err(dd, - "unable to pin IRQ %d\n", ret); + dd_dev_err(dd, "unable to pin IRQ %d\n", ret); } return ret; @@ -13009,7 +13010,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; - synchronize_irq(me->msix.vector); + synchronize_irq(me->irq); } } @@ -13022,7 +13023,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); me->arg = NULL; } @@ -13050,14 +13051,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) DRIVER_NAME "_%d kctxt%d", dd->unit, idx); me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; - + me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); + if (me->irq < 0) { + dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", + idx, me->irq); + return; + } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->msix.vector, receive_context_interrupt, + ret = request_threaded_irq(me->irq, receive_context_interrupt, receive_context_thread, 0, me->name, arg); if (ret) { - dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n", - me->msix.vector, idx, ret); + dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", + me->irq, idx, ret); return; } /* @@ -13070,7 +13076,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->msix.vector, me->arg); + free_irq(me->irq, me->arg); } } @@ -13093,9 +13099,8 @@ static void reset_interrupts(struct hfi1_devdata *dd) static int set_up_interrupts(struct hfi1_devdata *dd) { - struct hfi1_msix_entry *entries; - u32 total, request; - int i, ret; + u32 total; + int ret, request; int single_interrupt = 0; /* we expect to have all the interrupts */ /* @@ -13107,39 +13112,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd) */ total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; - entries = kcalloc(total, sizeof(*entries), GFP_KERNEL); - if (!entries) { - ret = -ENOMEM; - goto fail; - } - /* 1-1 MSI-X entry assignment */ - for (i = 0; i < total; i++) - entries[i].msix.entry = i; - /* ask for MSI-X interrupts */ - request = total; - request_msix(dd, &request, entries); - - if (request == 0) { + request = request_msix(dd, total); + if (request < 0) { + ret = request; + goto fail; + } else if (request == 0) { /* using INTx */ /* dd->num_msix_entries already zero */ - kfree(entries); single_interrupt = 1; dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n"); + } else if (request < total) { + /* using MSI-X, with reduced interrupts */ + dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n", + total, request); + ret = -EINVAL; + goto fail; } else { - /* using MSI-X */ - dd->num_msix_entries = request; - dd->msix_entries = entries; - - if (request != total) { - /* using MSI-X, with reduced interrupts */ - dd_dev_err( - dd, - "cannot handle reduced interrupt case, want %u, got %u\n", - total, request); - ret = -EINVAL; + dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), + GFP_KERNEL); + if (!dd->msix_entries) { + ret = -ENOMEM; goto fail; } + /* using MSI-X */ + dd->num_msix_entries = total; dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total); } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9c6c73448461..8f74cf6d6c9a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -521,7 +521,7 @@ static inline void incr_cntr32(u32 *cntr) #define MAX_NAME_SIZE 64 struct hfi1_msix_entry { enum irq_type type; - struct msix_entry msix; + int irq; void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; @@ -1838,9 +1838,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry); -void hfi1_enable_intx(struct pci_dev *pdev); +int request_msix(struct hfi1_devdata *dd, u32 msireq); void restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 6a9f6f9819e1..f01841b51946 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) iounmap(dd->piobase); } -static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt, - struct hfi1_msix_entry *hfi1_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - /* - * We can't pass hfi1_msix_entry array to msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the hfi1_msix_entry array. - */ - msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) { - ret = -ENOMEM; - goto do_intx; - } - - for (i = 0; i < nvec; i++) - msix_entry[i] = hfi1_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - nvec = ret; - - for (i = 0; i < nvec; i++) - hfi1_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - hfi1_enable_intx(dd->pcidev); -} - /* return the PCIe link speed from the given link status */ static u32 extract_speed(u16 linkstat) { @@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd) } /* - * Returns in *nent: - * - actual number of interrupts allocated + * Returns: + * - actual number of interrupts allocated or * - 0 if fell back to INTx. + * - error */ -void request_msix(struct hfi1_devdata *dd, u32 *nent, - struct hfi1_msix_entry *entry) +int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int pos; + int nvec; - pos = dd->pcidev->msix_cap; - if (*nent && pos) { - msix_setup(dd, pos, nent, entry); - /* did it, either MSI-X or INTx */ - } else { - *nent = 0; - hfi1_enable_intx(dd->pcidev); + nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, + PCI_IRQ_MSIX | PCI_IRQ_LEGACY); + if (nvec < 0) { + dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); + return nvec; } tune_pcie_caps(dd); -} -void hfi1_enable_intx(struct pci_dev *pdev) -{ - /* first, turn on INTx */ - pci_intx(pdev, 1); - /* then turn off MSI-X */ - pci_disable_msix(pdev); + /* check for legacy IRQ */ + if (nvec == 1 && !dd->pcidev->msix_enabled) + return 0; + + return nvec; } /* restore command and BARs after a reset has wiped them out */ -- cgit v1.2.3 From cb49366f3616fdf197893c24a5b2677b8c26ce29 Mon Sep 17 00:00:00 2001 From: "Vishwanathapura, Niranjana" Date: Thu, 1 Jun 2017 17:04:02 -0700 Subject: IB/core,rdmavt,hfi1,opa-vnic: Send OPA cap_mask3 in trap Provide the ability for IB clients to modify the OPA specific capability mask and include this mask in the subsequent trap data. Reviewed-by: Niranjana Vishwanathapura Signed-off-by: Michael N. Henry Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 7 ++----- drivers/infiniband/hw/hfi1/mad.h | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 6 +++++- drivers/infiniband/sw/rdmavt/vt.c | 9 +++++++-- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 27 ++++++++++++++++++++++++- include/rdma/ib_verbs.h | 3 ++- include/rdma/rdma_vt.h | 1 + 7 files changed, 44 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673a52d4..70831ad621b0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -260,6 +260,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) data.issuer_lid = cpu_to_be32(lid); data.ntc_144.lid = data.issuer_lid; data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); send_trap(ibp, &data, sizeof(data)); } @@ -704,11 +705,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT; pi->buffer_units = cpu_to_be32(buffer_units); - pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported | - OPA_CAP_MASK3_IsEthOnFabricSupported); - /* Driver does not support mcast/collective configuration */ - pi->opa_cap_mask &= - cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported); + pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) << 3 | (HFI1_MCAST_NR & 0x7)); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 5aa3fd1be653..a4e2506bd5ca 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -115,7 +115,7 @@ struct opa_mad_notice_attr { __be32 lid; /* LID where change occurred */ __be32 new_cap_mask; /* new capability mask */ __be16 reserved2; - __be16 cap_mask; + __be16 cap_mask3; __be16 change_flags; /* low 4 bits only */ } __packed ntc_144; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index af54d3f4696a..2d7759f0c6b4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1537,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; ibp->rvp.sm_lid = 0; - /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ + /* + * Below should only set bits defined in OPA PortInfo.CapabilityMask + * and PortInfo.CapabilityMask3 + */ ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | IB_PORT_CAP_MASK_NOTICE_SUP; + ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 0d7c6bb551d9..64bdd442078a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, return -EINVAL; rvp = rdi->ports[port_index]; - rvp->port_cap_flags |= props->set_port_cap_mask; - rvp->port_cap_flags &= ~props->clr_port_cap_mask; + if (port_modify_mask & IB_PORT_OPA_MASK_CHG) { + rvp->port_cap3_flags |= props->set_port_cap_mask; + rvp->port_cap3_flags &= ~props->clr_port_cap_mask; + } else { + rvp->port_cap_flags |= props->set_port_cap_mask; + rvp->port_cap_flags &= ~props->clr_port_cap_mask; + } if (props->set_port_cap_mask || props->clr_port_cap_mask) rdi->driver_f.cap_mask_chg(rdi, port_num); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 875694f9a7f9..32cdd7a35415 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -52,7 +52,9 @@ #include #include -#include +#include +#include +#include #include "opa_vnic_internal.h" @@ -979,6 +981,27 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) return 0; } +/** + * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM + * by way of ib_modify_port to indicate support for ethernet on the + * fabric. + * @cport: pointer to control port + * @en: enable or disable ethernet on fabric support + */ +static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en) +{ + struct ib_port_modify pm = { 0 }; + int i; + + if (en) + pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + else + pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported; + + for (i = 1; i <= cport->num_ports; i++) + ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm); +} + /** * opa_vnic_vema_add_one -- Handle new ib device * @device: ib device pointer @@ -1007,6 +1030,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device) c_info("VNIC client initialized\n"); ib_set_client_data(device, &opa_vnic_client, cport); + opa_vnic_ctrl_config_dev(cport, true); } /** @@ -1025,6 +1049,7 @@ static void opa_vnic_vema_rem_one(struct ib_device *device, return; c_info("removing VNIC client\n"); + opa_vnic_ctrl_config_dev(cport, false); vema_unregister(cport); kfree(cport); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8f1ce4e27bbd..9d4d2a74c95e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -577,7 +577,8 @@ struct ib_device_modify { enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), - IB_PORT_RESET_QKEY_CNTR = (1<<3) + IB_PORT_RESET_QKEY_CNTR = (1<<3), + IB_PORT_OPA_MASK_CHG = (1<<4) }; struct ib_port_modify { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index d0b9f91e5f4d..0f18ffd98dd7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -75,6 +75,7 @@ struct rvt_ibport { __be64 mkey; u64 tid; u32 port_cap_flags; + u16 port_cap3_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; -- cgit v1.2.3 From b888429c202197916c8c549811a2dd62f090280d Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:44 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_SEND_DONE bit operation The atomic SDMA_REQ_SEND_DONE bit is set by the process-level code, and then the same process-level code uses the bit to test that all packets have been submitted incurring a costly atomic read. Use a bool type with a READ_ONCE/WRITE_ONCE pairing for this bit, and use the same condition that is used to set the bit to test that all packets have been submitted. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 92517cebb4c7..6fb70f0064a6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -154,10 +154,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ /* SDMA request flag bits */ -#define SDMA_REQ_FOR_THREAD 1 -#define SDMA_REQ_SEND_DONE 2 -#define SDMA_REQ_HAS_ERROR 3 -#define SDMA_REQ_DONE_ERROR 4 +#define SDMA_REQ_HAS_ERROR 1 +#define SDMA_REQ_DONE_ERROR 2 #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) @@ -258,6 +256,7 @@ struct user_sdma_request { u16 tididx; /* progress index moving along the iovs array */ u8 iov_idx; + u8 done; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -628,6 +627,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqsubmitted = 0; req->flags = 0; req->tids = NULL; + req->done = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -809,7 +809,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, * request have been submitted to the SDMA engine. However, it * will not wait for send completions. */ - while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { + while (req->seqsubmitted != req->info.npkts) { ret = user_sdma_send_pkts(req, pcount); if (ret < 0) { if (ret != -EBUSY) { @@ -1118,7 +1118,7 @@ dosend: ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); req->seqsubmitted += count; if (req->seqsubmitted == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); + WRITE_ONCE(req->done, 1); /* * The txreq has already been submitted to the HW queue * so we can free the AHG entry now. Corruption will not @@ -1585,7 +1585,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && - (test_bit(SDMA_REQ_SEND_DONE, &req->flags) || + (READ_ONCE(req->done) || test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { user_sdma_free_request(req, false); pq_update(pq); -- cgit v1.2.3 From e9c48ebd0cb4d31bbaf60ddec2a2fa40227b8cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:50 -0700 Subject: IB/hfi1: Remove atomic SDMA_REQ_HAS_ERROR bit operation Atomic bit tests are used to single errors and the completion of request submissions. These operations don't need to be atomic and show to be expensive on the profile. Replace each atomic bit operation with a bool type and a READ_ONCE/WRITE_ONCE pairing. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6fb70f0064a6..fcadbb9978ca 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -153,10 +153,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -/* SDMA request flag bits */ -#define SDMA_REQ_HAS_ERROR 1 -#define SDMA_REQ_DONE_ERROR 2 - #define SDMA_PKT_Q_INACTIVE BIT(0) #define SDMA_PKT_Q_ACTIVE BIT(1) #define SDMA_PKT_Q_DEFERRED BIT(2) @@ -232,7 +228,6 @@ struct user_sdma_request { /* Writeable fields shared with interrupt */ u64 seqcomp ____cacheline_aligned_in_smp; u64 seqsubmitted; - unsigned long flags; /* status of the last txreq completed */ int status; @@ -257,6 +252,7 @@ struct user_sdma_request { /* progress index moving along the iovs array */ u8 iov_idx; u8 done; + u8 has_error; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; } ____cacheline_aligned_in_smp; @@ -625,9 +621,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->seqnum = 0; req->seqcomp = 0; req->seqsubmitted = 0; - req->flags = 0; req->tids = NULL; req->done = 0; + req->has_error = 0; INIT_LIST_HEAD(&req->txps); memcpy(&req->info, &info, sizeof(info)); @@ -814,7 +810,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, if (ret < 0) { if (ret != -EBUSY) { req->status = ret; - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); if (ACCESS_ONCE(req->seqcomp) == req->seqsubmitted - 1) goto free_req; @@ -916,10 +912,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) pq = req->pq; /* If tx completion has reported an error, we are done. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } /* * Check if we might have sent the entire request already @@ -942,10 +936,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) * with errors. If so, we are not going to process any * more packets from this request. */ - if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) { - set_bit(SDMA_REQ_DONE_ERROR, &req->flags); + if (READ_ONCE(req->has_error)) return -EFAULT; - } tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL); if (!tx) @@ -1566,7 +1558,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) if (status != SDMA_TXREQ_S_OK) { SDMA_DBG(req, "SDMA completion with error %d", status); - set_bit(SDMA_REQ_HAS_ERROR, &req->flags); + WRITE_ONCE(req->has_error, 1); } req->seqcomp = tx->seqnum; @@ -1586,7 +1578,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) req->status = status; if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) && (READ_ONCE(req->done) || - test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) { + READ_ONCE(req->has_error))) { user_sdma_free_request(req, false); pq_update(pq); set_comp_state(pq, cq, idx, ERROR, req->status); -- cgit v1.2.3 From b4e9e2f0fc87b876a4e2162733c1940e861785b1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 26 May 2017 05:35:57 -0700 Subject: IB/hfi1: Reclassify type of messages printed for platform config logic Reclassify messages printed out to /var/log/messages into warnings and errors to facilitate debugging in the future for issues related to the platform config logic. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 838fe84e285a..cbda9b189216 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -242,7 +242,7 @@ static int qual_power(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Port disabled due to system power restrictions\n", __func__); @@ -268,7 +268,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd) if (ppd->offline_disabled_reason == HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) { - dd_dev_info( + dd_dev_err( ppd->dd, "%s: Cable failed bitrate check, disabling port\n", __func__); @@ -709,15 +709,15 @@ static void apply_tunings( ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS, GENERAL_CONFIG, config_data); if (ret != HCMD_SUCCESS) - dd_dev_info(ppd->dd, - "%s: Failed set ext device config params\n", - __func__); + dd_dev_err(ppd->dd, + "%s: Failed set ext device config params\n", + __func__); } if (tx_preset_index == OPA_INVALID_INDEX) { if (ppd->port_type == PORT_TYPE_QSFP && limiting_active) - dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n", - __func__); + dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n", + __func__); return; } @@ -900,7 +900,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd, case 0xD: /* fallthrough */ case 0xF: default: - dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n", + dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n", __func__); break; } @@ -942,7 +942,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) case PORT_TYPE_DISCONNECTED: ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED); - dd_dev_info(dd, "%s: Port disconnected, disabling port\n", + dd_dev_warn(dd, "%s: Port disconnected, disabling port\n", __func__); goto bail; case PORT_TYPE_FIXED: @@ -1027,7 +1027,7 @@ void tune_serdes(struct hfi1_pportdata *ppd) } break; default: - dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__); + dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__); ppd->port_type = PORT_TYPE_UNKNOWN; tuning_method = OPA_UNKNOWN_TUNING; total_atten = 0; -- cgit v1.2.3 From 90dba23e1e30af72dbf4379842a5161e811b26b8 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 26 May 2017 05:36:04 -0700 Subject: IB/hfi1: Fix up sdma_init function comment sdma_init does not take a number of sdma engine parameters, rather it initializes all of the sdma engines. Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sdma.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index bfd0d5187e9b..d82ff57214c5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines) * @dd: hfi1_devdata * @port: port number (currently only zero) * - * sdma_init initializes the specified number of engines. - * - * The code initializes each sde, its csrs. Interrupts - * are not required to be enabled. + * Initializes each sde and its csrs. + * Interrupts are not required to be enabled. * * Returns: * 0 - success, -errno on failure -- cgit v1.2.3 From b2f8a04e77bad520d52b7f321ca776b33c947ad0 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:17:28 -0700 Subject: IB/rdmavt: Remove duplicated functions The free_qpn() function from the hfi1/qib driver which was the basis for rdmavt_free_qpn() function was accidentally left in the code. Remove it. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a3dd1e536860..a372afbbfbef 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -422,15 +422,6 @@ bail: return ret; } -static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - /** * rvt_clear_mr_refs - Drop help mr refs * @qp: rvt qp data structure @@ -646,6 +637,19 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, lockdep_assert_held(&qp->s_lock); } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ +static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for @@ -936,7 +940,7 @@ bail_ip: kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: - free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: if (!qp->ip) @@ -1325,19 +1329,6 @@ inval: return -EINVAL; } -/** rvt_free_qpn - Free a qpn from the bit map - * @qpt: QP table - * @qpn: queue pair number to free - */ -static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy -- cgit v1.2.3 From bc54f6714c3a5d1f7ac6e7e5a5f7c390b1a01285 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:14 -0700 Subject: IB/hfi1: Ensure dd->gi_mask can not be overflowed As the code stands today the array access in remap_intr() is OK. To future proof the code though we should explicitly check to ensure the index value is not outside of the valid range. This is not a straight forward calculation so err on the side of caution. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9118618d28e5..d6af715c35bf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12832,7 +12832,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) /* clear from the handled mask of the general interrupt */ m = isrc / 64; n = isrc % 64; - dd->gi_mask[m] &= ~((u64)1 << n); + if (likely(m < CCE_NUM_INT_CSRS)) { + dd->gi_mask[m] &= ~((u64)1 << n); + } else { + dd_dev_err(dd, "remap interrupt err\n"); + return; + } /* direct the chip source to the given MSI-X interrupt */ m = isrc / 8; -- cgit v1.2.3 From 67838e64fa63415fe4e0da7149bcb123ed9f5612 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:18:46 -0700 Subject: IB/hfi1: Fix spelling mistake in linkdown reason Spell receive correctly in OPA_LINKDOWN_REASON_RCV_ERROR Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index d6af715c35bf..99c29ddcca35 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6906,7 +6906,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd) static const char * const link_down_reason_strs[] = { [OPA_LINKDOWN_REASON_NONE] = "None", - [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0", + [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0", [OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length", [OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long", [OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short", -- cgit v1.2.3 From 6c31e5283cb06b81a13cc88da2f2ad3db594a935 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:21 -0700 Subject: IB/hfi1: Use QPN mask to avoid overflow Ensure we can't come up with an array size that is bigger than the array by applying the QPN mask before the divide in the free_qpn function. Reviewed-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a372afbbfbef..2ce0928dddd6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -645,7 +645,7 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; - map = qpt->map + qpn / RVT_BITS_PER_PAGE; + map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE; if (map->page) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } -- cgit v1.2.3 From 52d86e72c515ebd4fb0d329470aa50698e28fb36 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:19:46 -0700 Subject: IB/hfi1: Remove subtraction of uninitialized value In process_receive_packet the packet header field is used to calculate the length of the packet. However this is not necessarily setup. In fact only if the ECN prescan is enabled will the packet header be valid at this point. The code works as is because we do not do anything with the packet length at this point in the packet processing. The length and header are setup correctly in hfi1_setup_ib_header which is called by the following sequence: process_receive_packet() -> rhf_receieve_function_map[]() --> process_receive_ib() ---> hfi1_setup_9B_packet() ----> hfi1_setup_ib_header() Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 2a1022e374a5..9e59430bc55c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -703,7 +703,6 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread) packet->etype = rhf_rcv_type(packet->rhf); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; /* total length */ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */ /* retrieve eager buffer details */ -- cgit v1.2.3 From f1685179820bb7f9d9c4f5d0ac9bfb269529a919 Mon Sep 17 00:00:00 2001 From: Neel Desai Date: Mon, 29 May 2017 17:20:27 -0700 Subject: IB/hfi1: Add error checking for buffer overrun in OPA aggregate Improve safety of code by checking the size of the data buffer and prevent buffer overrun Reviewed-by: Dennis Dalessandro Reviewed-by: Brian Welty Signed-off-by: Neel Desai Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 296 ++++++++++++++++++++++++--------------- 1 file changed, 185 insertions(+), 111 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 70831ad621b0..b180dff5f2e9 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,14 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +static int smp_length_check(u32 data_size, u32 request_len) +{ + if (unlikely(request_len < data_size)) + return -EINVAL; + + return 0; +} + static int reply(struct ib_mad_hdr *smp) { /* @@ -308,11 +316,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct opa_node_description *nd; - if (am) { + if (am || smp_length_check(sizeof(*nd), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -329,7 +337,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_node_info *ni; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); @@ -339,6 +347,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data, /* GUID 0 is illegal */ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 || + smp_length_check(sizeof(*ni), max_len) || get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -520,7 +529,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd) static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int i; struct hfi1_devdata *dd; @@ -536,7 +545,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, u32 buffer_units; u64 tmp = 0; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -745,7 +754,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_req = OPA_AM_NBLK(am); @@ -768,6 +777,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16); + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_req > n_blocks_avail || n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; " @@ -1071,7 +1085,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1092,7 +1106,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, int ret, i, invalid = 0, call_set_mtu = 0; int call_link_downgrade_policy = 0; - if (num_ports != 1) { + if (num_ports != 1 || + smp_length_check(sizeof(*pi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1343,7 +1358,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ret) return ret; - ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); /* restore re-reg bit per o14-12.2.1 */ pi->clientrereg_subnettimeout |= clientrereg; @@ -1360,7 +1376,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return ret; get_only: - return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, + max_len); } /** @@ -1421,7 +1438,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 n_blocks_sent = OPA_AM_NBLK(am); @@ -1431,6 +1448,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, int i; u16 n_blocks_avail; unsigned npkeys = hfi1_get_npkeys(dd); + u32 size = 0; if (n_blocks_sent == 0) { pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n", @@ -1441,6 +1459,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1; + size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE); + + if (smp_length_check(size, max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + if (start_block + n_blocks_sent > n_blocks_avail || n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) { pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n", @@ -1458,7 +1483,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len, + max_len); } #define ILLEGAL_VL 12 @@ -1519,14 +1545,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data) static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1542,14 +1568,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; + size_t size = ARRAY_SIZE(ibp->sl_to_sc); int i; u8 sc; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1564,19 +1591,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data, } } - return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *p = data; size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */ unsigned i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1592,13 +1620,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); + size_t size = ARRAY_SIZE(ibp->sc_to_sl); u8 *p = data; int i; - if (am) { + if (am || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1606,19 +1635,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data, for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++) ibp->sc_to_sl[i] = *p++; - return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); void *vp = (void *)data; size_t size = 4 * sizeof(u64); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1633,7 +1663,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NBLK(am); int async_update = OPA_AM_ASYNC(am); @@ -1641,8 +1671,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, void *vp = (void *)data; struct hfi1_pportdata *ppd; int lstate; + /* + * set_sc2vlt_tables writes the information contained in *data + * to four 64-bit registers SendSC2VLt[0-3]. We need to make + * sure *max_len is not greater than the total size of the four + * SendSC2VLt[0-3] registers. + */ + size_t size = 4 * sizeof(u64); - if (n_blocks != 1 || async_update) { + if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1662,27 +1699,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data, set_sc2vlt_tables(dd, vp); - return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; - int size; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp); + fm_get_table(ppd, FM_TBL_SC2VLNT, vp); if (resp_len) *resp_len += size; @@ -1692,15 +1730,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 n_blocks = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; void *vp = (void *)data; int lstate; + int size = sizeof(struct sc2vlnt); - if (n_blocks != 1) { + if (n_blocks != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1718,12 +1757,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data, fm_set_table(ppd, FM_TBL_SC2VLNT, vp); return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1732,7 +1771,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_pportdata *ppd; struct opa_port_state_info *psi = (struct opa_port_state_info *)data; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1765,7 +1804,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -1776,7 +1815,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct opa_port_state_info *psi = (struct opa_port_state_info *)data; int ret, invalid = 0; - if (nports != 1) { + if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1806,19 +1845,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (invalid) smp->status |= IB_SMP_INVALID_FIELD; - return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); u32 addr = OPA_AM_CI_ADDR(am); u32 len = OPA_AM_CI_LEN(am) + 1; int ret; - if (dd->pport->port_type != PORT_TYPE_QSFP) { + if (dd->pport->port_type != PORT_TYPE_QSFP || + smp_length_check(len, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1861,21 +1902,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - int size; + int size = sizeof(struct buffer_control); - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } ppd = dd->pport + (port - 1); - size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); + fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p); trace_bct_get(dd, p); if (resp_len) *resp_len += size; @@ -1884,14 +1926,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data, } static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, - struct ib_device *ibdev, u8 port, u32 *resp_len) + struct ib_device *ibdev, u8 port, u32 *resp_len, + u32 max_len) { u32 num_ports = OPA_AM_NPORT(am); struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd; struct buffer_control *p = (struct buffer_control *)data; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1902,41 +1945,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len, + max_len); } static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; - int size = 0; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } switch (section) { case OPA_VLARB_LOW_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); + fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p); break; case OPA_VLARB_HIGH_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); + fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p); break; case OPA_VLARB_PREEMPT_ELEMENTS: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p); break; case OPA_VLARB_PREEMPT_MATRIX: - size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); + fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p); break; default: pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n", be32_to_cpu(smp->attr_mod)); smp->status |= IB_SMP_INVALID_FIELD; + size = 0; break; } @@ -1948,14 +1993,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); u32 num_ports = OPA_AM_NPORT(am); u8 section = (am & 0x00ff0000) >> 16; u8 *p = data; + int size = 256; - if (num_ports != 1) { + if (num_ports != 1 || smp_length_check(size, max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -1983,7 +2029,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data, break; } - return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_pma_mad { @@ -3279,13 +3326,18 @@ struct opa_congestion_info_attr { static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_info_attr *p = (struct opa_congestion_info_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; p->congestion_log_length = OPA_CONG_LOG_ELEMS; @@ -3298,7 +3350,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { int i; struct opa_congestion_setting_attr *p = @@ -3308,6 +3360,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am, struct opa_congestion_setting_entry_shadow *entries; struct cc_state *cc_state; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + rcu_read_lock(); cc_state = get_cc_state(ppd); @@ -3382,7 +3439,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct opa_congestion_setting_attr *p = (struct opa_congestion_setting_attr *)data; @@ -3391,6 +3448,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, struct opa_congestion_setting_entry_shadow *entries; int i; + if (smp_length_check(sizeof(*p), max_len)) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + /* * Save details from packet into the ppd. Hold the cc_state_lock so * our information is consistent with anyone trying to apply the state. @@ -3412,12 +3474,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, apply_cc_state(ppd); return __subn_get_opa_cong_setting(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); } static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, - u8 port, u32 *resp_len) + u8 port, u32 *resp_len, u32 max_len) { struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -3425,7 +3487,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, s64 ts; int i; - if (am != 0) { + if (am || smp_length_check(sizeof(*cong_log), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3483,7 +3545,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *cc_table_attr = (struct ib_cc_table_attr *)data; @@ -3495,9 +3557,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; struct cc_state *cc_state; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3527,14 +3590,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, rcu_read_unlock(); if (resp_len) - *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); + *resp_len += size; return reply((struct ib_mad_hdr *)smp); } static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3545,9 +3608,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, int i, j; u32 sentry, eentry; u16 ccti_limit; + u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1); /* sanity check n_blocks, start_block */ - if (n_blocks == 0 || + if (n_blocks == 0 || smp_length_check(size, max_len) || start_block + n_blocks > ppd->cc_max_table_entries) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); @@ -3578,7 +3642,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, /* now apply the information */ apply_cc_state(ppd); - return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len, + max_len); } struct opa_led_info { @@ -3591,7 +3656,7 @@ struct opa_led_info { static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct hfi1_pportdata *ppd = dd->pport; @@ -3599,7 +3664,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, u32 nport = OPA_AM_NPORT(am); u32 is_beaconing_active; - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3621,14 +3686,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_led_info *p = (struct opa_led_info *)data; u32 nport = OPA_AM_NPORT(am); int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK); - if (nport != 1) { + if (nport != 1 || smp_length_check(sizeof(*p), max_len)) { smp->status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)smp); } @@ -3638,12 +3703,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data, else shutdown_led_override(dd->pport); - return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len); + return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len, + max_len); } static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3651,71 +3717,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_NODE_DESC: ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_NODE_INFO: ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PORT_INFO: ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_get_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_get_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CABLE_INFO: ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_INFO: ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_get_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_LOG: ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_get_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3733,7 +3799,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, u32 max_len) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -3741,51 +3807,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SL_TO_SC_MAP: ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_SL_MAP: ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLT_MAP: ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_SC_TO_VLNT_MAP: ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_VL_ARB_TABLE: ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING: ret = __subn_set_opa_cong_setting(smp, am, data, ibdev, - port, resp_len); + port, resp_len, max_len); break; case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE: ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_LED_INFO: ret = __subn_set_opa_led_info(smp, am, data, ibdev, port, - resp_len); + resp_len, max_len); break; case IB_SMP_ATTR_SM_INFO: if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) @@ -3841,7 +3907,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, memset(next_smp + sizeof(*agg), 0, agg_data_len); (void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3884,7 +3953,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL); + ibdev, port, NULL, (u32)agg_data_len); + if (smp->status & IB_SMP_INVALID_FIELD) + break; if (smp->status & ~IB_SMP_DIRECTION) { set_aggr_error(agg); return reply((struct ib_mad_hdr *)smp); @@ -3994,12 +4065,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); u8 *data; - u32 am; + u32 am, data_size; __be16 attr_id; int ret; *out_mad = *in_mad; data = opa_get_smp_data(smp); + data_size = (u32)opa_get_smp_data_size(smp); am = be32_to_cpu(smp->attr_mod); attr_id = smp->attr_id; @@ -4043,7 +4115,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: clear_opa_smp_data(smp); ret = subn_get_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_get_opa_aggregate(smp, ibdev, port, @@ -4055,7 +4128,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, switch (attr_id) { default: ret = subn_set_opa_sma(attr_id, smp, am, data, - ibdev, port, resp_len); + ibdev, port, resp_len, + data_size); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, -- cgit v1.2.3 From d54389836ac63cb517bf863b9f6c22626c6aec26 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 29 May 2017 17:20:53 -0700 Subject: IB/core: Allow QP state transition from reset to error Playing with IP-O-IB interface can trigger a warning message: "ib0: Failed to modify QP to ERROR state" to be logged. This happens when the QP is in IB_QPS_RESET state and the stack is trying to transition it to IB_QPS_ERR state in ipoib_ib_dev_stop(). According to the IB spec, Table 91 - "QP State Transition Properties" it looks like the transition from reset to error is valid: Transition: Any State to Error Required Attributes: None Optional Attributes: None allowed Actions: Queue processing is stopped. Work Requests pending or in process are completed in error, when possible. This patch allows the transition and quiets the message. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4792f5209ac2..b822bedeb4a5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -872,6 +872,7 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { -- cgit v1.2.3 From bec7c79cd8f764ba84c8ec6d8c402b8a7cd3a54f Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 29 May 2017 17:21:32 -0700 Subject: IB/hfi1: Modify handling of physical link state by Host Driver Ensure states returned to the Fabric Manager are consistent with the OPA specification by caching the physical state along with the logical state. Reviewed-by: Stuart Summers Reviewed-by: Ira Weiny Reviewed-by: Andrzej Kotlowski Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 102 +++++++++++++++++++++++++----------- drivers/infiniband/hw/hfi1/chip.h | 2 +- drivers/infiniband/hw/hfi1/driver.c | 8 +-- drivers/infiniband/hw/hfi1/hfi.h | 18 ++++++- drivers/infiniband/hw/hfi1/mad.c | 6 +-- drivers/infiniband/hw/hfi1/verbs.c | 2 +- 6 files changed, 97 insertions(+), 41 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 99c29ddcca35..3fae98d079e4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1066,6 +1066,8 @@ static int thermal_init(struct hfi1_devdata *dd); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10028,28 +10030,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) sdma_update_lmc(dd, mask, ppd->lid); } -static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs) -{ - unsigned long timeout; - u32 curr_state; - - timeout = jiffies + msecs_to_jiffies(msecs); - while (1) { - curr_state = read_physical_state(dd); - if (curr_state == state) - break; - if (time_after(jiffies, timeout)) { - dd_dev_err(dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, curr_state); - return -ETIMEDOUT; - } - usleep_range(1950, 2050); /* sleep 2ms-ish */ - } - - return 0; -} - static const char *state_completed_string(u32 completed) { static const char * const state_completed[] = { @@ -10283,7 +10263,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) if (do_wait) { /* it can take a while for the link to go down */ - ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000); + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); if (ret < 0) return ret; } @@ -10536,6 +10516,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) goto unexpected; } + /* + * Wait for Link_Up physical state. + * Physical and Logical states should already be + * be transitioned to LinkUp and LinkInit respectively. + */ + ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to LINK-UP\n", + __func__); + break; + } + ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000); if (ret) { dd_dev_err(dd, @@ -10649,6 +10642,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) */ if (ret) goto_offline(ppd, 0); + else + cache_physical_state(ppd); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10673,6 +10668,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; break; } + ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000); + if (ret) { + dd_dev_err(dd, + "%s: physical state did not change to DISABLED\n", + __func__); + break; + } dc_shutdown(dd); } ppd->host_link_state = HLS_DN_DISABLE; @@ -10690,6 +10692,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; + cache_physical_state(ppd); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12663,21 +12666,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return -ETIMEDOUT; } -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd) +/* + * Read the physical hardware link state and set the driver's cached value + * of it. + */ +void cache_physical_state(struct hfi1_pportdata *ppd) { - u32 pstate; + u32 read_pstate; u32 ib_pstate; - pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, pstate); - if (ppd->last_pstate != ib_pstate) { + read_pstate = read_physical_state(ppd->dd); + ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); + /* check if OPA pstate changed */ + if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { dd_dev_info(ppd->dd, "%s: physical state changed to %s (0x%x), phy 0x%x\n", __func__, opa_pstate_name(ib_pstate), ib_pstate, - pstate); - ppd->last_pstate = ib_pstate; + read_pstate); + } + ppd->pstate = read_pstate; +} + +/* + * wait_physical_linkstate - wait for an physical link state change to occur + * @ppd: port device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for physical link state change to occur. + * Returns 0 if state reached, otherwise -ETIMEDOUT. + */ +static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, + int msecs) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + cache_physical_state(ppd); + if (ppd->pstate == state) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link state 0x%x, current state is 0x%x\n", + state, ppd->pstate); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ } - return ib_pstate; + + return 0; } #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ @@ -14781,7 +14819,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->last_pstate = 0xff; /* invalid value */ + ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 0b4f418ba0ac..3dab3156ba4a 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); u32 get_logical_state(struct hfi1_pportdata *ppd); +void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); @@ -1354,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); -u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 9e59430bc55c..e64e9e28c936 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -906,10 +906,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, sc = hfi1_9B_get_sc5(hdr, packet->rhf); } if (sc != SC15_PACKET) { - int hwstate = read_logical_state(dd); + int hwstate = driver_lstate(rcd->ppd); - if (hwstate != LSTATE_ACTIVE) { - dd_dev_info(dd, "Unexpected link state %d\n", hwstate); + if (hwstate != IB_PORT_ACTIVE) { + dd_dev_info(dd, + "Unexpected link state %s\n", + opa_lstate_name(hwstate)); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 8f74cf6d6c9a..bca781c3b5ac 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -663,7 +663,7 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 last_pstate; /* info only */ + u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1330,6 +1330,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) return ppd->lstate; } +/* return the driver's idea of the physical OPA port state */ +static inline u32 driver_pstate(struct hfi1_pportdata *ppd) +{ + /* + * The driver does some processing from the time the physical + * link state is at LINKUP to the time the SM can be notified + * as such. Return IB_PORTPHYSSTATE_TRAINING until the software + * state is ready. + */ + if (ppd->pstate == PLS_LINKUP && + !(ppd->host_link_state & HLS_UP)) + return IB_PORTPHYSSTATE_TRAINING; + else + return chip_to_opa_pstate(ppd->dd, ppd->pstate); +} + void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index b180dff5f2e9..c8daf633212d 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -113,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) return; /* o14-3.2.1 */ - if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) return; /* o14-2 */ @@ -615,7 +615,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; pi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | state; + (driver_pstate(ppd) << 4) | state; pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc; @@ -1791,7 +1791,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, ppd->offline_disabled_reason; psi->port_states.portphysstate_portstate = - (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); + (driver_pstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2d7759f0c6b4..5b53faf47042 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1354,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->lmc = ppd->lmc; /* OPA logical states match IB logical states */ props->state = driver_lstate(ppd); - props->phys_state = hfi1_ibphys_portstate(ppd); + props->phys_state = driver_pstate(ppd); props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ -- cgit v1.2.3 From 13d84914db56c1afd1c9bf4f41e9bf91f061a7dd Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 29 May 2017 17:22:01 -0700 Subject: IB/hfi1,qib: Do not send QKey trap for UD qps According to IBTA spec a QKey violation should not result in a bad qkey trap being triggered for UD queue pairs. Also since it is a silent error we do not increment the q_key violation or the dropped packet counters. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 14 ++++-------- drivers/infiniband/hw/hfi1/ruc.c | 8 +++---- drivers/infiniband/hw/hfi1/ud.c | 37 ++++++++++-------------------- drivers/infiniband/hw/hfi1/verbs.h | 4 ++-- drivers/infiniband/hw/qib/qib_mad.c | 13 ++++------- drivers/infiniband/hw/qib/qib_ruc.c | 20 ++++++++-------- drivers/infiniband/hw/qib/qib_ud.c | 43 ++++++++++++----------------------- drivers/infiniband/hw/qib/qib_verbs.h | 4 ++-- 8 files changed, 54 insertions(+), 89 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index c8daf633212d..a081a98d728a 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -180,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2) { struct opa_mad_notice_attr data; u32 lid = ppd_from_ibp(ibp)->lid; @@ -191,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, u32 _lid2 = lid2; memset(&data, 0, sizeof(data)); - - if (trap_num == OPA_TRAP_BAD_P_KEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = OPA_TRAP_BAD_P_KEY; data.issuer_lid = cpu_to_be32(lid); data.ntc_257_258.lid1 = cpu_to_be32(_lid1); data.ntc_257_258.lid2 = cpu_to_be32(_lid2); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 476fe5da2992..9cf506a9a796 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -254,8 +254,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -290,8 +290,8 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) } if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, (u16)bth0, sl, + 0, qp->ibqp.qp_num, slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index c995aa58c36a..6bf7a1b08491 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, qp->s_pkey_index, slid))) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - slid, rdma_ah_get_dlid(ah_attr)); + hfi1_bad_pkey(ibp, pkey, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + slid, rdma_ah_get_dlid(ah_attr)); goto drop; } } @@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - lid, - rdma_ah_get_dlid(ah_attr)); - goto drop; - } + if (unlikely(qkey != qp->qkey)) + goto drop; /* silently drop per IBTA spec */ } /* @@ -722,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * for invalid pkeys is optional according to * IB spec (release 1.3, section 10.9.4) */ - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, - pkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + hfi1_bad_pkey(ibp, + pkey, sl, + src_qp, qp->ibqp.qp_num, + slid, dlid); return; } } else { @@ -734,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) { - hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl, - src_qp, qp->ibqp.qp_num, - slid, dlid); + if (unlikely(qkey != qp->qkey)) /* Silent drop */ return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen > 2048 || (sc5 == 0xF)))) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 17b38cd5f654..fdf1e1fb880c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) /* * This must be called with s_lock held. */ -void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); +void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, u16 lid1, u16 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index da295e0392ed..a4a7f2a76f24 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -134,24 +134,21 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) } /* - * Send a bad [PQ]_Key trap (ch. 14.3.8). + * Send a bad P_Key trap (ch. 14.3.8). */ -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) { struct ib_mad_notice_attr data; - if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) - ibp->rvp.pkey_violations++; - else - ibp->rvp.qkey_violations++; ibp->rvp.n_pkt_drops++; + ibp->rvp.pkey_violations++; /* Send violation trap */ data.generic_type = IB_NOTICE_TYPE_SECURITY; data.prod_type_msb = 0; data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = trap_num; + data.trap_num = IB_NOTICE_TRAP_BAD_PKEY; data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); data.toggle_count = 0; memset(&data.details, 0, sizeof(data.details)); diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 88d84cbf7e5a..28528459a052 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -256,11 +256,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_alt_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -295,11 +295,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, } if (!qib_pkey_ok((u16)bth0, qib_get_pkey(ibp, qp->s_pkey_index))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - (u16)bth0, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - 0, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + qib_bad_pkey(ibp, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); goto err; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 341a123ee95c..be4907453ac4 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -66,8 +66,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; - rcu_read_unlock(); - return; + goto drop; } sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? @@ -94,11 +93,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + qib_bad_pkey(ibp, pkey1, + rdma_ah_get_sl(ah_attr), + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(rdma_ah_get_dlid(ah_attr))); goto drop; } } @@ -113,18 +112,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) qkey = (int)swqe->ud_wr.remote_qkey < 0 ? sqp->qkey : swqe->ud_wr.remote_qkey; - if (unlikely(qkey != qp->qkey)) { - u16 lid; - - lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - rdma_ah_get_sl(ah_attr), - sqp->ibqp.qp_num, qp->ibqp.qp_num, - cpu_to_be16(lid), - cpu_to_be16(rdma_ah_get_dlid(ah_attr))); + if (unlikely(qkey != qp->qkey)) goto drop; - } } /* @@ -487,22 +476,18 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, pkey1 = be32_to_cpu(ohdr->bth[0]); pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, - pkey1, - (be16_to_cpu(hdr->lrh[0]) >> 4) & + qib_bad_pkey(ibp, + pkey1, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); return; } } - if (unlikely(qkey != qp->qkey)) { - qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, - (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, - src_qp, qp->ibqp.qp_num, - hdr->lrh[3], hdr->lrh[1]); + if (unlikely(qkey != qp->qkey)) return; - } + /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && (tlen != 256 || diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..33d5691a9b2d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -241,8 +241,8 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0); } -void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, - u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); +void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); -- cgit v1.2.3 From ddbf2efff4ad85135b9fd1cb53b10069a160bd58 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 9 Jun 2017 15:59:26 -0700 Subject: IB/hfi1: Fix DC 8051 host info flag array Fix info array of host message flags by adding entry for link width downgrade and reverse values for BC SMA and BC PWR_MSG messages Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3fae98d079e4..ebcb2c405c5f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = { */ static struct flag_table dc8051_info_host_msg_flags[] = { FLAG_ENTRY0("Host request done", 0x0001), - FLAG_ENTRY0("BC SMA message", 0x0002), - FLAG_ENTRY0("BC PWR_MGM message", 0x0004), + FLAG_ENTRY0("BC PWR_MGM message", 0x0002), + FLAG_ENTRY0("BC SMA message", 0x0004), FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008), FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010), FLAG_ENTRY0("External device config request", 0x0020), FLAG_ENTRY0("VerifyCap all frames received", 0x0040), FLAG_ENTRY0("LinkUp achieved", 0x0080), FLAG_ENTRY0("Link going down", 0x0100), + FLAG_ENTRY0("Link width downgraded", 0x0200), }; static u32 encoded_size(u32 size); -- cgit v1.2.3 From 702265fc0002497fd0ddaae4a5bec00a8a40da3e Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:33 -0700 Subject: IB/hfi1: Set proper logging levels on QSFP cable error events Change QSFP cable error events logging levels from info to error. Reviewed-by: Jakub Byczkowski Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 64 +++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ebcb2c405c5f..b8ee0e27dae6 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9376,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too high\n", + __func__); if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) || (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING)) - dd_dev_info(dd, "%s: QSFP cable temperature too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP cable temperature too low\n", + __func__); /* * The remaining alarms/warnings don't matter if the link is down. @@ -9392,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too high\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too high\n", + __func__); if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) || (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING)) - dd_dev_info(dd, "%s: QSFP supply voltage too low\n", - __func__); + dd_dev_err(dd, "%s: QSFP supply voltage too low\n", + __func__); /* Byte 2 is vendor specific */ if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n", + __func__); if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n", + __func__); if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) || (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n", + __func__); if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n", + __func__); if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) || (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING)) - dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n", - __func__); + dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n", + __func__); /* Bytes 9-10 and 11-12 are reserved */ /* Bytes 13-15 are vendor specific */ -- cgit v1.2.3 From 9c1a99c3882beb9e88ed41d914e75bab2d593926 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:40 -0700 Subject: IB/hfi1: Create common expected receive verbs/PSM code Declarations and code in common between verbs and PSM are now moved to exp_rcv.[ch]. Reviewed-by: Michael J. Ruhl Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Makefile | 2 +- drivers/infiniband/hw/hfi1/exp_rcv.c | 118 +++++++++++++++++++ drivers/infiniband/hw/hfi1/exp_rcv.h | 187 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/file_ops.c | 4 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 121 ------------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 23 +--- drivers/infiniband/hw/hfi1/user_sdma.c | 38 ------ 7 files changed, 309 insertions(+), 184 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.c create mode 100644 drivers/infiniband/hw/hfi1/exp_rcv.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 88085f65432e..66d538c033b0 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ - eprom.o file_ops.o firmware.o \ + eprom.o exp_rcv.o file_ops.o firmware.o \ init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c new file mode 100644 index 000000000000..08d13ed1b574 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -0,0 +1,118 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "exp_rcv.h" +#include "trace.h" + +/** + * exp_tid_group_init - initialize exp_tid_set + * @set - the set + */ +void hfi1_exp_tid_group_init(struct exp_tid_set *set) +{ + INIT_LIST_HEAD(&set->list); + set->count = 0; +} + +/** + * alloc_ctxt_rcv_groups - initialize expected receive groups + * @rcd - the context to add the groupings to + */ +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 tidbase; + struct tid_group *grp; + int i; + + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); + + tidbase = rcd->expected_base; + for (i = 0; i < rcd->expected_count / + dd->rcv_entries.group_size; i++) { + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto bail; + grp->size = dd->rcv_entries.group_size; + grp->base = tidbase; + tid_group_add_tail(grp, &rcd->tid_group_list); + tidbase += dd->rcv_entries.group_size; + } + + return 0; +bail: + hfi1_free_ctxt_rcv_groups(rcd); + return -ENOMEM; +} + +/** + * free_ctxt_rcv_groups - free expected receive groups + * @rcd - the context to free + * + * The routine dismantles the expect receive linked + * list and clears any tids associated with the receive + * context. + * + * This should only be called for kernel contexts and the + * a base user context. + */ +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) +{ + struct tid_group *grp, *gptr; + + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); + WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); + + list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { + tid_group_remove(grp, &rcd->tid_group_list); + kfree(grp); + } + + hfi1_clear_tids(rcd); +} diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h new file mode 100644 index 000000000000..c7d02bcddded --- /dev/null +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -0,0 +1,187 @@ +#ifndef _HFI1_EXP_RCV_H +#define _HFI1_EXP_RCV_H +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "hfi.h" + +#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) + +#define EXP_TID_TIDLEN_MASK 0x7FFULL +#define EXP_TID_TIDLEN_SHIFT 0 +#define EXP_TID_TIDCTRL_MASK 0x3ULL +#define EXP_TID_TIDCTRL_SHIFT 20 +#define EXP_TID_TIDIDX_MASK 0x3FFULL +#define EXP_TID_TIDIDX_SHIFT 22 +#define EXP_TID_GET(tid, field) \ + (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) + +#define EXP_TID_SET(field, value) \ + (((value) & EXP_TID_TID##field##_MASK) << \ + EXP_TID_TID##field##_SHIFT) +#define EXP_TID_CLEAR(tid, field) ({ \ + (tid) &= ~(EXP_TID_TID##field##_MASK << \ + EXP_TID_TID##field##_SHIFT); \ + }) +#define EXP_TID_RESET(tid, field, value) do { \ + EXP_TID_CLEAR(tid, field); \ + (tid) |= EXP_TID_SET(field, (value)); \ + } while (0) + +/* + * Define fields in the KDETH header so we can update the header + * template. + */ +#define KDETH_OFFSET_SHIFT 0 +#define KDETH_OFFSET_MASK 0x7fff +#define KDETH_OM_SHIFT 15 +#define KDETH_OM_MASK 0x1 +#define KDETH_TID_SHIFT 16 +#define KDETH_TID_MASK 0x3ff +#define KDETH_TIDCTRL_SHIFT 26 +#define KDETH_TIDCTRL_MASK 0x3 +#define KDETH_INTR_SHIFT 28 +#define KDETH_INTR_MASK 0x1 +#define KDETH_SH_SHIFT 29 +#define KDETH_SH_MASK 0x1 +#define KDETH_KVER_SHIFT 30 +#define KDETH_KVER_MASK 0x3 +#define KDETH_JKEY_SHIFT 0x0 +#define KDETH_JKEY_MASK 0xff +#define KDETH_HCRC_UPPER_SHIFT 16 +#define KDETH_HCRC_UPPER_MASK 0xff +#define KDETH_HCRC_LOWER_SHIFT 24 +#define KDETH_HCRC_LOWER_MASK 0xff + +#define KDETH_GET(val, field) \ + (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) +#define KDETH_SET(dw, field, val) do { \ + u32 dwval = le32_to_cpu(dw); \ + dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ + dwval |= (((val) & KDETH_##field##_MASK) << \ + KDETH_##field##_SHIFT); \ + dw = cpu_to_le32(dwval); \ + } while (0) + +#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); }) + +/* KDETH OM multipliers and switch over point */ +#define KDETH_OM_SMALL 4 +#define KDETH_OM_SMALL_SHIFT 2 +#define KDETH_OM_LARGE 64 +#define KDETH_OM_LARGE_SHIFT 6 +#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) + +struct tid_group { + struct list_head list; + u32 base; + u8 size; + u8 used; + u8 map; +}; + +/* + * Write an "empty" RcvArray entry. + * This function exists so the TID registaration code can use it + * to write to unused/unneeded entries and still take advantage + * of the WC performance improvements. The HFI will ignore this + * write to the RcvArray entry. + */ +static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) +{ + /* + * Doing the WC fill writes only makes sense if the device is + * present and the RcvArray has been mapped as WC memory. + */ + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + writeq(0, dd->rcvarray_wc + (index * 8)); +} + +static inline void tid_group_add_tail(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_add_tail(&grp->list, &set->list); + set->count++; +} + +static inline void tid_group_remove(struct tid_group *grp, + struct exp_tid_set *set) +{ + list_del_init(&grp->list); + set->count--; +} + +static inline void tid_group_move(struct tid_group *group, + struct exp_tid_set *s1, + struct exp_tid_set *s2) +{ + tid_group_remove(group, s1); + tid_group_add_tail(group, s2); +} + +static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) +{ + struct tid_group *grp = + list_first_entry(&set->list, struct tid_group, list); + list_del_init(&grp->list); + set->count--; + return grp; +} + +static inline u32 rcventry2tidinfo(u32 rcventry) +{ + u32 pair = rcventry & ~0x1; + + return EXP_TID_SET(IDX, pair >> 1) | + EXP_TID_SET(CTRL, 1 << (rcventry - pair)); +} + +int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); +void hfi1_exp_tid_group_init(struct exp_tid_set *set); + +#endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3158128d57e8..2dd8758f0644 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -804,7 +804,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) dd->rcd[uctxt->ctxt] = NULL; - hfi1_user_exp_rcv_grp_free(uctxt); + hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->rcvwait_to = 0; @@ -1260,7 +1260,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = hfi1_user_exp_rcv_grp_init(fd); + ret = hfi1_alloc_ctxt_rcv_groups(uctxt); if (ret) goto setup_failed; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index a8f0aa4722f6..6318e6ca1b18 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -51,14 +51,6 @@ #include "trace.h" #include "mmu_rb.h" -struct tid_group { - struct list_head list; - u32 base; - u8 size; - u8 used; - u8 map; -}; - struct tid_rb_node { struct mmu_rb_node mmu; unsigned long phys; @@ -75,8 +67,6 @@ struct tid_pageset { u16 count; }; -#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list)) - #define num_user_pages(vaddr, len) \ (1 + (((((unsigned long)(vaddr) + \ (unsigned long)(len) - 1) & PAGE_MASK) - \ @@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = { .invalidate = tid_rb_invalidate }; -static inline u32 rcventry2tidinfo(u32 rcventry) -{ - u32 pair = rcventry & ~0x1; - - return EXP_TID_SET(IDX, pair >> 1) | - EXP_TID_SET(CTRL, 1 << (rcventry - pair)); -} - -static inline void exp_tid_group_init(struct exp_tid_set *set) -{ - INIT_LIST_HEAD(&set->list); - set->count = 0; -} - -static inline void tid_group_remove(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_del_init(&grp->list); - set->count--; -} - -static inline void tid_group_add_tail(struct tid_group *grp, - struct exp_tid_set *set) -{ - list_add_tail(&grp->list, &set->list); - set->count++; -} - -static inline struct tid_group *tid_group_pop(struct exp_tid_set *set) -{ - struct tid_group *grp = - list_first_entry(&set->list, struct tid_group, list); - list_del_init(&grp->list); - set->count--; - return grp; -} - -static inline void tid_group_move(struct tid_group *group, - struct exp_tid_set *s1, - struct exp_tid_set *s2) -{ - tid_group_remove(group, s1); - tid_group_add_tail(group, s2); -} - -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd) -{ - struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_devdata *dd = fd->dd; - u32 tidbase; - u32 i; - struct tid_group *grp, *gptr; - - exp_tid_group_init(&uctxt->tid_group_list); - exp_tid_group_init(&uctxt->tid_used_list); - exp_tid_group_init(&uctxt->tid_full_list); - - tidbase = uctxt->expected_base; - for (i = 0; i < uctxt->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto grp_failed; - - grp->size = dd->rcv_entries.group_size; - grp->base = tidbase; - tid_group_add_tail(grp, &uctxt->tid_group_list); - tidbase += dd->rcv_entries.group_size; - } - - return 0; - -grp_failed: - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - - return -ENOMEM; -} - /* * Initialize context and file private data needed for Expected * receive caching. This needs to be done after the context has @@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) return ret; } -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt) -{ - struct tid_group *grp, *gptr; - - list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list, - list) { - list_del_init(&grp->list); - kfree(grp); - } - hfi1_clear_tids(uctxt); -} - void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; @@ -302,23 +198,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/* - * Write an "empty" RcvArray entry. - * This function exists so the TID registaration code can use it - * to write to unused/unneeded entries and still take advantage - * of the WC performance improvements. The HFI will ignore this - * write to the RcvArray entry. - */ -static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) -{ - /* - * Doing the WC fill writes only makes sense if the device is - * present and the RcvArray has been mapped as WC memory. - */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) - writeq(0, dd->rcvarray_wc + (index * 8)); -} - /* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 5250c897298d..1bdc61be53cb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -49,29 +49,8 @@ #include "hfi.h" -#define EXP_TID_TIDLEN_MASK 0x7FFULL -#define EXP_TID_TIDLEN_SHIFT 0 -#define EXP_TID_TIDCTRL_MASK 0x3ULL -#define EXP_TID_TIDCTRL_SHIFT 20 -#define EXP_TID_TIDIDX_MASK 0x3FFULL -#define EXP_TID_TIDIDX_SHIFT 22 -#define EXP_TID_GET(tid, field) \ - (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK) +#include "exp_rcv.h" -#define EXP_TID_SET(field, value) \ - (((value) & EXP_TID_TID##field##_MASK) << \ - EXP_TID_TID##field##_SHIFT) -#define EXP_TID_CLEAR(tid, field) ({ \ - (tid) &= ~(EXP_TID_TID##field##_MASK << \ - EXP_TID_TID##field##_SHIFT); \ - }) -#define EXP_TID_RESET(tid, field, value) do { \ - EXP_TID_CLEAR(tid, field); \ - (tid) |= EXP_TID_SET(field, (value)); \ - } while (0) - -void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt); -int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fcadbb9978ca..8f7cfdd9e9ec 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -94,27 +94,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 /* Number of BTH.PSN bits used for sequence number in expected rcvs */ #define BTH_SEQ_MASK 0x7ffull -/* - * Define fields in the KDETH header so we can update the header - * template. - */ -#define KDETH_OFFSET_SHIFT 0 -#define KDETH_OFFSET_MASK 0x7fff -#define KDETH_OM_SHIFT 15 -#define KDETH_OM_MASK 0x1 -#define KDETH_TID_SHIFT 16 -#define KDETH_TID_MASK 0x3ff -#define KDETH_TIDCTRL_SHIFT 26 -#define KDETH_TIDCTRL_MASK 0x3 -#define KDETH_INTR_SHIFT 28 -#define KDETH_INTR_MASK 0x1 -#define KDETH_SH_SHIFT 29 -#define KDETH_SH_MASK 0x1 -#define KDETH_HCRC_UPPER_SHIFT 16 -#define KDETH_HCRC_UPPER_MASK 0xff -#define KDETH_HCRC_LOWER_SHIFT 24 -#define KDETH_HCRC_LOWER_MASK 0xff - #define AHG_KDETH_INTR_SHIFT 12 #define AHG_KDETH_SH_SHIFT 13 #define AHG_KDETH_ARRAY_SIZE 9 @@ -122,16 +101,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define KDETH_GET(val, field) \ - (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK) -#define KDETH_SET(dw, field, val) do { \ - u32 dwval = le32_to_cpu(dw); \ - dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \ - dwval |= (((val) & KDETH_##field##_MASK) << \ - KDETH_##field##_SHIFT); \ - dw = cpu_to_le32(dwval); \ - } while (0) - #define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ do { \ if ((idx) < ARRAY_SIZE((arr))) \ @@ -142,13 +111,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 return -ERANGE; \ } while (0) -/* KDETH OM multipliers and switch over point */ -#define KDETH_OM_SMALL 4 -#define KDETH_OM_SMALL_SHIFT 2 -#define KDETH_OM_LARGE 64 -#define KDETH_OM_LARGE_SHIFT 6 -#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1)) - /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ -- cgit v1.2.3 From f5114440c5491d4737b061c3a77195088bbaca52 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 15:59:46 -0700 Subject: IB/hfi1: Remove reading platform configuration from EFI variable Currently, platform configuration can be read from EFI variable for discrete cards. It will happen when reading from EPROM fails. EFI variables should not be queried for platform configuration in any scenario. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbda9b189216..41307e474525 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; - unsigned long size = 0; u8 *temp_platform_config = NULL; u32 esize; @@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd) dd->platform_config.size = esize; return; } - /* fail, try EFI variable */ - - ret = read_hfi1_efi_var(dd, "configuration", &size, - (void **)&temp_platform_config); - if (!ret) { - dd->platform_config.data = temp_platform_config; - dd->platform_config.size = size; - return; - } } dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", -- cgit v1.2.3 From f523984fb85d16e098aac94642cc16803a4cc61f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:53 -0700 Subject: IB/hfi1: Use a template for tid reg/unreg This is the preferred way to add a duplicate trace call. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace_rx.h | 46 ++++++++++------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 05fc6d68ffe8..7af593827d37 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -138,7 +138,8 @@ TRACE_EVENT(hfi1_receive_interrupt, ) ); -TRACE_EVENT(hfi1_exp_tid_reg, +DECLARE_EVENT_CLASS( + hfi1_exp_tid_reg_unreg, TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, unsigned long va, unsigned long pa, dma_addr_t dma), @@ -172,38 +173,17 @@ TRACE_EVENT(hfi1_exp_tid_reg, ) ); -TRACE_EVENT(hfi1_exp_tid_unreg, - TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, - unsigned long va, unsigned long pa, dma_addr_t dma), - TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma), - TP_STRUCT__entry( - __field(unsigned int, ctxt) - __field(u16, subctxt) - __field(u32, rarr) - __field(u32, npages) - __field(unsigned long, va) - __field(unsigned long, pa) - __field(dma_addr_t, dma) - ), - TP_fast_assign( - __entry->ctxt = ctxt; - __entry->subctxt = subctxt; - __entry->rarr = rarr; - __entry->npages = npages; - __entry->va = va; - __entry->pa = pa; - __entry->dma = dma; - ), - TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx", - __entry->ctxt, - __entry->subctxt, - __entry->rarr, - __entry->npages, - __entry->pa, - __entry->va, - __entry->dma - ) - ); +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); + +DEFINE_EVENT( + hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg, + TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages, + unsigned long va, unsigned long pa, dma_addr_t dma), + TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, -- cgit v1.2.3 From 8cb1021b806bda3f5fda6f3699e1f98df14245df Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 9 Jun 2017 15:59:59 -0700 Subject: IB/hfi1: Add traces for TID operations This patch adds a trace for putting a TID and for writing the RcvArray CSR. The CSR access template can be easily extended for additional CSR readq/writeq calls. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 ++------------- drivers/infiniband/hw/hfi1/trace_misc.h | 20 +++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_rx.h | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b8ee0e27dae6..937350d9deab 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9745,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd) return 0; } -static const char * const pt_names[] = { - "expected", - "eager", - "invalid" -}; - -static const char *pt_name(u32 type) -{ - return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type]; -} - /* * index is the index into the receive array */ @@ -9777,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, type, index); goto done; } - - hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", - pt_name(type), index, pa, (unsigned long)order); + trace_hfi1_put_tid(dd, index, type, pa, order); #define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; + trace_hfi1_write_rcvarray(base + (index * 8), reg); writeq(reg, base + (index * 8)); if (type == PT_EAGER) diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h index deac77ddaeab..8db2253523ff 100644 --- a/drivers/infiniband/hw/hfi1/trace_misc.h +++ b/drivers/infiniband/hw/hfi1/trace_misc.h @@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt, __entry->src) ); +DECLARE_EVENT_CLASS( + hfi1_csr_template, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value), + TP_STRUCT__entry( + __field(void __iomem *, addr) + __field(u64, value) + ), + TP_fast_assign( + __entry->addr = addr; + __entry->value = value; + ), + TP_printk("addr %p value %llx", __entry->addr, __entry->value) +); + +DEFINE_EVENT( + hfi1_csr_template, hfi1_write_rcvarray, + TP_PROTO(void __iomem *addr, u64 value), + TP_ARGS(addr, value)); + #ifdef CONFIG_FAULT_INJECTION TRACE_EVENT(hfi1_fault_opcode, TP_PROTO(struct rvt_qp *qp, u8 opcode), diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 7af593827d37..84929578cfe6 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -52,6 +52,13 @@ #include "hfi.h" +#define tidtype_name(type) { PT_##type, #type } +#define show_tidtype(type) \ +__print_symbolic(type, \ + tidtype_name(EXPECTED), \ + tidtype_name(EAGER), \ + tidtype_name(INVALID)) \ + #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx @@ -185,6 +192,34 @@ DEFINE_EVENT( unsigned long va, unsigned long pa, dma_addr_t dma), TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)); +TRACE_EVENT( + hfi1_put_tid, + TP_PROTO(struct hfi1_devdata *dd, + u32 index, u32 type, unsigned long pa, u16 order), + TP_ARGS(dd, index, type, pa, order), + TP_STRUCT__entry( + DD_DEV_ENTRY(dd) + __field(unsigned long, pa); + __field(u32, index); + __field(u32, type); + __field(u16, order); + ), + TP_fast_assign( + DD_DEV_ASSIGN(dd); + __entry->pa = pa; + __entry->index = index; + __entry->type = type; + __entry->order = order; + ), + TP_printk("[%s] type %s pa %lx index %u order %u", + __get_str(dev), + show_tidtype(__entry->type), + __entry->pa, + __entry->index, + __entry->order + ) +); + TRACE_EVENT(hfi1_exp_tid_inval, TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr, u32 npages, dma_addr_t dma), -- cgit v1.2.3 From 581d01aaaca1fbb9df83cf3337c77e85215dcc5b Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:06 -0700 Subject: IB/qib: Replace deprecated pci functions with new API pci_enable_msix_range() and pci_disable_msix() have been deprecated. Updating to the new pci_alloc_irq_vectors() interface. Reviewed-by: Sebastian Sanchez Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib.h | 8 +- drivers/infiniband/hw/qib/qib_iba6120.c | 6 +- drivers/infiniband/hw/qib/qib_iba7220.c | 7 +- drivers/infiniband/hw/qib/qib_iba7322.c | 48 +++++----- drivers/infiniband/hw/qib/qib_pcie.c | 149 +++++++++++++------------------- 5 files changed, 98 insertions(+), 120 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index a3e21a25cea5..f9e1c69603a5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1,7 +1,7 @@ #ifndef _QIB_KERNEL_H #define _QIB_KERNEL_H /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * @@ -443,7 +443,7 @@ struct qib_irq_notify; #endif struct qib_msix_entry { - struct msix_entry msix; + int irq; void *arg; #ifdef CONFIG_INFINIBAND_QIB_DCA int dca; @@ -1433,9 +1433,9 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent); int qib_reinit_intr(struct qib_devdata *); -void qib_enable_intx(struct pci_dev *); +void qib_enable_intx(struct qib_devdata *dd); void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index e423b71e6ea0..46045fc28fa0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -1838,7 +1838,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); /* clear the reset error, init error/hwerror mask */ @@ -3562,7 +3562,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, if (qib_mini_init) goto bail; - if (qib_pcie_params(dd, 8, NULL, NULL)) + if (qib_pcie_params(dd, 8, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); dd->cspec->irq = pdev->irq; /* save IRQ */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index c3679c48e61c..49cd6e3beb72 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2011 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. @@ -2148,7 +2149,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) bail: if (ret) { - if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + if (qib_pcie_params(dd, dd->lbus_width, NULL)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -3309,7 +3310,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSI interrupt not detected, trying INTx interrupts\n"); qib_7220_free_irq(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); /* * Some newer kernels require free_irq before disable_msi, * and irq can be changed during disable and INTx enable @@ -4619,7 +4620,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, minwidth = 8; /* x8 capable boards */ break; } - if (qib_pcie_params(dd, minwidth, NULL, NULL)) + if (qib_pcie_params(dd, minwidth, NULL)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bb2439fff8fa..2653064ce9e9 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -2841,10 +2841,10 @@ static void qib_7322_nomsix(struct qib_devdata *dd) reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); #endif irq_set_affinity_hint( - dd->cspec->msix_entries[i].msix.vector, NULL); + dd->cspec->msix_entries[i].irq, NULL); free_cpumask_var(dd->cspec->msix_entries[i].mask); - free_irq(dd->cspec->msix_entries[i].msix.vector, - dd->cspec->msix_entries[i].arg); + free_irq(dd->cspec->msix_entries[i].irq, + dd->cspec->msix_entries[i].arg); } qib_nomsix(dd); } @@ -3336,9 +3336,9 @@ static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) qib_devinfo(dd->pcidev, "Disabling notifier on HCA %d irq %d\n", dd->unit, - m->msix.vector); + m->irq); irq_set_affinity_notifier( - m->msix.vector, + m->irq, NULL); m->notifier = NULL; } @@ -3354,7 +3354,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) int ret; m->notifier = n; - n->notify.irq = m->msix.vector; + n->notify.irq = m->irq; n->notify.notify = qib_irq_notifier_notify; n->notify.release = qib_irq_notifier_release; n->arg = m->arg; @@ -3500,10 +3500,21 @@ try_intx: - 1, QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq( - dd->cspec->msix_entries[msixnum].msix.vector, - handler, 0, dd->cspec->msix_entries[msixnum].name, - arg); + + dd->cspec->msix_entries[msixnum].irq = pci_irq_vector( + dd->pcidev, msixnum); + if (dd->cspec->msix_entries[msixnum].irq < 0) { + qib_dev_err(dd, + "Couldn't get MSIx irq (vec=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].irq); + qib_7322_nomsix(dd); + goto try_intx; + } + ret = request_irq(dd->cspec->msix_entries[msixnum].irq, + handler, 0, + dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could @@ -3512,7 +3523,7 @@ try_intx: qib_dev_err(dd, "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, ret); qib_7322_nomsix(dd); goto try_intx; @@ -3548,7 +3559,7 @@ try_intx: dd->cspec->msix_entries[msixnum].mask); } irq_set_affinity_hint( - dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].irq, dd->cspec->msix_entries[msixnum].mask); } msixnum++; @@ -3744,7 +3755,6 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -3762,8 +3772,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) write_7322_initregs(dd); if (qib_pcie_params(dd, dd->lbus_width, - &dd->cspec->num_msix_entries, - dd->cspec->msix_entries)) + &dd->cspec->num_msix_entries)) qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; continuing anyway\n"); @@ -5195,7 +5204,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd) qib_devinfo(dd->pcidev, "MSIx interrupt not detected, trying INTx interrupts\n"); qib_7322_nomsix(dd); - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); qib_setup_7322_interrupt(dd, 0); return 1; } @@ -7327,10 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, if (!dd->cspec->msix_entries) tabsize = 0; - for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].msix.entry = i; - - if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) + if (qib_pcie_params(dd, 8, &tabsize)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; continuing anyway\n"); /* may be less than we wanted, if not enough available */ diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c379b8342a09..d90403e31a9d 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2010 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two @@ -187,112 +188,84 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct qib_msix_entry *qib_msix_entry) -{ - int ret; - int nvec = *msixcnt; - struct msix_entry *msix_entry; - int i; - - ret = pci_msix_vec_count(dd->pcidev); - if (ret < 0) - goto do_intx; - - nvec = min(nvec, ret); - - /* We can't pass qib_msix_entry array to qib_msix_setup - * so use a dummy msix_entry array and copy the allocated - * irq back to the qib_msix_entry array. */ - msix_entry = kcalloc(nvec, sizeof(*msix_entry), GFP_KERNEL); - if (!msix_entry) - goto do_intx; - - for (i = 0; i < nvec; i++) - msix_entry[i] = qib_msix_entry[i].msix; - - ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); - if (ret < 0) - goto free_msix_entry; - else - nvec = ret; - - for (i = 0; i < nvec; i++) - qib_msix_entry[i].msix = msix_entry[i]; - - kfree(msix_entry); - *msixcnt = nvec; - return; - -free_msix_entry: - kfree(msix_entry); - -do_intx: - qib_dev_err( - dd, - "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n", - nvec, ret); - *msixcnt = 0; - qib_enable_intx(dd->pcidev); -} - /** * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. */ -static int qib_msi_setup(struct qib_devdata *dd, int pos) +static void qib_msi_setup(struct qib_devdata *dd, int pos) { struct pci_dev *pdev = dd->pcidev; u16 control; - int ret; - ret = pci_enable_msi(pdev); - if (ret) - qib_dev_err(dd, - "pci_enable_msi failed: %d, interrupts may not work\n", - ret); - /* continue even if it fails, we may still be OK... */ - - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, - &dd->msi_lo); - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, - &dd->msi_hi); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi); pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); + /* now save the data (vector) info */ - pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) - ? 12 : 8), + pci_read_config_word(pdev, + pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), &dd->msi_data); - return ret; } -int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct qib_msix_entry *entry) +static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec) +{ + unsigned int flags = PCI_IRQ_LEGACY; + + /* Check our capabilities */ + if (dd->pcidev->msix_cap) { + flags |= PCI_IRQ_MSIX; + } else { + if (dd->pcidev->msi_cap) { + flags |= PCI_IRQ_MSI; + /* Get msi_lo and msi_hi */ + qib_msi_setup(dd, dd->pcidev->msi_cap); + } + } + + if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI))) + qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + + return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags); +} + +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent) { u16 linkstat, speed; - int pos = 0, ret = 1; + int nvec; + int maxvec; + int ret = 0; if (!pci_is_pcie(dd->pcidev)) { qib_dev_err(dd, "Can't find PCI Express capability!\n"); /* set up something... */ dd->lbus_width = 1; dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + ret = -1; goto bail; } - pos = dd->pcidev->msix_cap; - if (nent && *nent && pos) { - qib_msix_setup(dd, pos, nent, entry); - ret = 0; /* did it, either MSIx or INTx */ - } else { - pos = dd->pcidev->msi_cap; - if (pos) - ret = qib_msi_setup(dd, pos); - else - qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + maxvec = (nent && *nent) ? *nent : 1; + nvec = qib_allocate_irqs(dd, maxvec); + if (nvec < 0) { + ret = nvec; + goto bail; + } + + /* + * If nent exists, make sure to record how many vectors were allocated + */ + if (nent) { + *nent = nvec; + + /* + * If we requested (nent) MSIX, but msix_enabled is not set, + * pci_alloc_irq_vectors() enabled INTx. + */ + if (!dd->pcidev->msix_enabled) + qib_dev_err(dd, + "no msix vectors allocated, using INTx\n"); } - if (!pos) - qib_enable_intx(dd->pcidev); pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); /* @@ -379,7 +352,7 @@ int qib_reinit_intr(struct qib_devdata *dd) ret = 1; bail: if (!ret && (dd->flags & QIB_HAS_INTX)) { - qib_enable_intx(dd->pcidev); + qib_enable_intx(dd); ret = 1; } @@ -397,7 +370,7 @@ bail: void qib_nomsi(struct qib_devdata *dd) { dd->msi_lo = 0; - pci_disable_msi(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* @@ -405,23 +378,21 @@ void qib_nomsi(struct qib_devdata *dd) */ void qib_nomsix(struct qib_devdata *dd) { - pci_disable_msix(dd->pcidev); + pci_free_irq_vectors(dd->pcidev); } /* * Similar to pci_intx(pdev, 1), except that we make sure * msi(x) is off. */ -void qib_enable_intx(struct pci_dev *pdev) +void qib_enable_intx(struct qib_devdata *dd) { u16 cw, new; int pos; + struct pci_dev *pdev = dd->pcidev; - /* first, turn on INTx */ - pci_read_config_word(pdev, PCI_COMMAND, &cw); - new = cw & ~PCI_COMMAND_INTX_DISABLE; - if (new != cw) - pci_write_config_word(pdev, PCI_COMMAND, new); + if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0) + qib_dev_err(dd, "Failed to enable INTx\n"); pos = pdev->msi_cap; if (pos) { -- cgit v1.2.3 From fe4e74eeb24286c730672e776ac4c2c3caa19137 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:12 -0700 Subject: IB/hfi1: Initialize TID lists to avoid crash on cleanup The expected receive lists (tid_xxx_list) are not initialized until late in the receive context initialization. If an error happens before the initialization, a NULL pointer access will occur during cleanup. Initialized the lists sooner rather than later to avoid this Oops: IP: unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] RIP: 0010:unlock_exp_tids.isra.11+0x26/0xd0 [hfi1] Call Trace: hfi1_user_exp_rcv_free+0x79/0xb0 [hfi1] hfi1_file_close+0x87/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/exp_rcv.c | 4 ---- drivers/infiniband/hw/hfi1/init.c | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 08d13ed1b574..0af91675acc6 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -69,10 +69,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) struct tid_group *grp; int i; - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); - tidbase = rcd->expected_base; for (i = 0; i < rcd->expected_count / dd->rcv_entries.group_size; i++) { diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 4a11d4da4c92..a00308ccf016 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -67,6 +67,7 @@ #include "aspm.h" #include "affinity.h" #include "vnic.h" +#include "exp_rcv.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -221,6 +222,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, hfi1_cdbg(PROC, "setting up context %u\n", ctxt); INIT_LIST_HEAD(&rcd->qp_wait_list); + hfi1_exp_tid_group_init(&rcd->tid_group_list); + hfi1_exp_tid_group_init(&rcd->tid_used_list); + hfi1_exp_tid_group_init(&rcd->tid_full_list); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); -- cgit v1.2.3 From f683c80ca68e087b55c6f9ab6ca6beb88ebc6d69 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 9 Jun 2017 16:00:19 -0700 Subject: IB/hfi1: Resolve kernel panics by reference counting receive contexts Base receive contexts can be used by sub contexts. Because of this, resources for the context cannot be completely freed until all sub contexts are done using the base context. Introduce a reference count so that the base receive context can be freed only when all sub contexts are done with it. Use the provided function call for setting default send context integrity rather than the manual method. The cleanup path does not set all variables back to NULL after freeing resources. Since the clean up code can get called more than once, (e.g. during context close and on the error path), it is necessary to make sure that all the variables are NULLed. Possible crash are: BUG: unable to handle kernel paging request at 0000000001908900 IP: read_csr+0x24/0x30 [hfi1] RIP: 0010:read_csr+0x24/0x30 [hfi1] Call Trace: sc_disable+0x40/0x110 [hfi1] hfi1_file_close+0x16f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 or kernel BUG at mm/slub.c:3877! RIP: 0010:kfree+0x14f/0x170 Call Trace: hfi1_free_ctxtdata+0x19a/0x2b0 [hfi1] ? hfi1_user_exp_rcv_grp_free+0x73/0x80 [hfi1] hfi1_file_close+0x20f/0x360 [hfi1] __fput+0xe7/0x210 ____fput+0xe/0x10 Fixes: Commit 62239fc6e554 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 39 +++++++++++++++-------- drivers/infiniband/hw/hfi1/hfi.h | 11 +++---- drivers/infiniband/hw/hfi1/init.c | 58 ++++++++++++++++++++++++++++------ drivers/infiniband/hw/hfi1/vnic_main.c | 11 ++++--- 4 files changed, 85 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2dd8758f0644..bbf80b1dd9d9 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) *ev = 0; __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { mutex_unlock(&hfi1_mutex); goto done; @@ -794,16 +796,15 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); /* - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) + * If a send context is allocated, reset context integrity + * checks to default and disable the send context. */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type)); - sc_disable(uctxt->sc); + if (uctxt->sc) { + set_pio_integrity(uctxt->sc); + sc_disable(uctxt->sc); + } spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); @@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_stats.sps_ctxts--; if (++dd->freectxts == dd->num_user_contexts) aspm_enable_all(dd); + + /* _rcd_put() should be done after releasing mutex */ + dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); /* dd reference */ done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) { - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - return -ENOMEM; - } + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) ret = init_user_ctxt(fd); - if (ret) + if (ret) { clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + hfi1_rcd_put(fd->uctxt); + } } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, fd->uctxt = uctxt; fd->subctxt = subctxt; + + hfi1_rcd_get(uctxt); __set_bit(fd->subctxt, uctxt->in_use_ctxts); return 1; @@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, aspm_disable_all(dd); fd->uctxt = uctxt; + /* Count the reference for the fd */ + hfi1_rcd_get(uctxt); + return 0; ctxdata_free: dd->rcd[ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); return ret; } @@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) return 0; setup_failed: + /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ hfi1_free_ctxtdata(dd, uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bca781c3b5ac..1a33a5087734 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -213,11 +213,9 @@ struct hfi1_ctxtdata { /* dynamic receive available interrupt timeout */ u32 rcvavail_timeout; - /* - * number of opens (including slave sub-contexts) on this instance - * (ignoring forks, dup, etc. for now) - */ - int cnt; + /* Reference count the base context usage */ + struct kref kref; + /* Device context index */ unsigned ctxt; /* @@ -1291,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); - +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a00308ccf016..dfdb4126ca05 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -191,15 +191,45 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; - if (dd->rcd) { - for (i = 0; i < dd->num_rcv_contexts; ++i) - hfi1_free_ctxtdata(dd, dd->rcd[i]); - } + for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) + hfi1_rcd_put(dd->rcd[i]); + + /* All the contexts should be freed, free the array */ kfree(dd->rcd); dd->rcd = NULL; return ret; } +/* + * Helper routines for the receive context reference count (rcd and uctxt) + */ +static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) +{ + kref_init(&rcd->kref); +} + +static void hfi1_rcd_free(struct kref *kref) +{ + struct hfi1_ctxtdata *rcd = + container_of(kref, struct hfi1_ctxtdata, kref); + + hfi1_free_ctxtdata(rcd->dd, rcd); + kfree(rcd); +} + +int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) +{ + if (rcd) + return kref_put(&rcd->kref, hfi1_rcd_free); + + return 0; +} + +void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) +{ + kref_get(&rcd->kref); +} + /* * Common code for user and kernel context setup. */ @@ -332,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, if (!rcd->opstats) goto bail; } + + hfi1_rcd_init(rcd); } return rcd; bail: @@ -931,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd) * @rcd: the ctxtdata structure * * free up any allocated data for a context - * This should not touch anything that would affect a simultaneous - * re-allocation of context data, because it is called after hfi1_mutex - * is released (and can be called from reinit as well). * It should never change any chip state, or global driver state. */ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - unsigned e; + u32 e; if (!rcd) return; @@ -957,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) /* all the RcvArray entries should have been cleared by now */ kfree(rcd->egrbufs.rcvtids); + rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { if (rcd->egrbufs.buffers[e].dma) @@ -966,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); + rcd->egrbufs.alloced = 0; + rcd->egrbufs.buffers = NULL; sc_free(rcd->sc); + rcd->sc = NULL; + vfree(rcd->subctxt_uregbase); vfree(rcd->subctxt_rcvegrbuf); vfree(rcd->subctxt_rcvhdr_base); kfree(rcd->opstats); - kfree(rcd); + + rcd->subctxt_uregbase = NULL; + rcd->subctxt_rcvegrbuf = NULL; + rcd->subctxt_rcvhdr_base = NULL; + rcd->opstats = NULL; } /* @@ -1366,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_free_ctxtdata(dd, rcd); + hfi1_rcd_put(rcd); } } kfree(tmp); diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index b601c2929f8f..950c1b4df442 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_free_ctxtdata() also releases send_context - * structure if uctxt->sc is not null + * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * release send_context structure if uctxt->sc is not null */ dd->rcd[uctxt->ctxt] = NULL; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_free_ctxtdata(dd, uctxt); + hfi1_rcd_put(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) @@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); if (rc) break; + hfi1_rcd_get(dd->vnic.ctxt[i]); dd->vnic.ctxt[i]->vnic_q_idx = i; } @@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) */ while (i-- > dd->vnic.num_ctxt) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } goto alloc_fail; @@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) if (--dd->vnic.num_vports == 0) { for (i = 0; i < dd->vnic.num_ctxt; i++) { deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); + hfi1_rcd_put(dd->vnic.ctxt[i]); dd->vnic.ctxt[i] = NULL; } hfi1_deinit_vnic_rsm(dd); -- cgit v1.2.3 From bc5214ee29220251e5507882696ded5ca183b169 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Fri, 9 Jun 2017 16:00:26 -0700 Subject: IB/hfi1: Handle missing magic values in config file Driver does not check whether proper configuration file exist in EPROM, and treats empty partition as possible valid configuration, preventing fallback to default firmware. Change EPROM read function to treat missing magic number as read error. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/eprom.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 26da124c88e2..d46b17107901 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, { void *buffer; void *p; - u32 length; int ret; buffer = kmalloc(P1_SIZE, GFP_KERNEL); @@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, /* scan for image magic that may trail the actual data */ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); - if (p) - length = p - buffer; - else - length = P1_SIZE; + if (!p) { + kfree(buffer); + return -ENOENT; + } *data = buffer; - *size = length; + *size = p - buffer; return 0; } -- cgit v1.2.3 From 3363828758597ed5e0f20172fccaad514b947031 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Jun 2017 11:29:03 +0300 Subject: IB/rxe: Use "foo *bar" instead of "foo * bar" Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 1ac5b8551a4d..6447d736d5a4 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -97,7 +97,7 @@ int rxe_rcv(struct sk_buff *skb); void rxe_dev_put(struct rxe_dev *rxe); struct rxe_dev *net_to_rxe(struct net_device *ndev); -struct rxe_dev *get_rxe_by_name(const char* name); +struct rxe_dev *get_rxe_by_name(const char *name); void rxe_port_up(struct rxe_dev *rxe); void rxe_port_down(struct rxe_dev *rxe); -- cgit v1.2.3 From c498e82e3c3231504831cd391970e63c1597a01b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Jun 2017 11:29:04 +0300 Subject: IB/rxe: Prefer 'unsigned int' to bare use of 'unsigned' Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 +- drivers/infiniband/sw/rxe/rxe_pool.c | 2 +- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index d6299edf9a5b..6b65c0132289 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -250,7 +250,7 @@ void rxe_resp_queue_pkt(struct rxe_dev *rxe, void rxe_comp_queue_pkt(struct rxe_dev *rxe, struct rxe_qp *qp, struct sk_buff *skb); -static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp) +static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp) { return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type]; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 75d11ee635ec..c1b5f38f31a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -188,7 +188,7 @@ int rxe_pool_init( struct rxe_dev *rxe, struct rxe_pool *pool, enum rxe_elem_type type, - unsigned max_elem) + unsigned int max_elem) { int err = 0; size_t size = rxe_type_info[type].size; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 7ee465d1a1e1..db7161456f45 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -43,7 +43,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, static inline void retry_first_write_send(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned mask, int npsn) + unsigned int mask, int npsn) { int i; -- cgit v1.2.3 From c05d26647b06b49e286b11cbd14e1444f9acc1c5 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Jun 2017 11:29:05 +0300 Subject: IB/rxe: Use DEVICE_ATTR_RO macro to show parent field Use DEVICE_ATTR RO() macro and rename the show function accordingly. Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index af90a7d42b96..ceab12a91d0c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1210,8 +1210,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) return rxe_mcast_drop_grp_elem(rxe, qp, mgid); } -static ssize_t rxe_show_parent(struct device *device, - struct device_attribute *attr, char *buf) +static ssize_t parent_show(struct device *device, + struct device_attribute *attr, char *buf) { struct rxe_dev *rxe = container_of(device, struct rxe_dev, ib_dev.dev); @@ -1219,7 +1219,7 @@ static ssize_t rxe_show_parent(struct device *device, return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } -static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); +static DEVICE_ATTR_RO(parent); static struct device_attribute *rxe_dev_attributes[] = { &dev_attr_parent, -- cgit v1.2.3 From 61013828f6673fffc6ead0a62d52674ffc1b34dc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Jun 2017 11:29:06 +0300 Subject: IB/rxe: Use __func__ to print function's name Its better to use __func__ to print functions name instead of writing the name in the print statement. Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_task.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_verbs.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index d2a14a1bdc7f..ea3810b29273 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -78,7 +78,7 @@ void rxe_do_task(unsigned long data) default: spin_unlock_irqrestore(&task->state_lock, flags); - pr_warn("bad state = %d in rxe_do_task\n", task->state); + pr_warn("%s failed with bad state %d\n", __func__, task->state); return; } @@ -105,7 +105,7 @@ void rxe_do_task(unsigned long data) break; default: - pr_warn("bad state = %d in rxe_do_task\n", + pr_warn("%s failed with bad state %d\n", __func__, task->state); } spin_unlock_irqrestore(&task->state_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index ceab12a91d0c..aba3fa9ae599 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1336,15 +1336,15 @@ int rxe_register_device(struct rxe_dev *rxe) err = ib_register_device(dev, NULL); if (err) { - pr_warn("rxe_register_device failed, err = %d\n", err); + pr_warn("%s failed with error %d\n", __func__, err); goto err1; } for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) { err = device_create_file(&dev->dev, rxe_dev_attributes[i]); if (err) { - pr_warn("device_create_file failed, i = %d, err = %d\n", - i, err); + pr_warn("%s failed with error %d for attr number %d\n", + __func__, err, i); goto err2; } } -- cgit v1.2.3 From b4fbec9673db22dcd3cea7ce04148af39fe19e12 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 15 Jun 2017 11:29:07 +0300 Subject: IB/rxe: Constify static rxe_vm_ops Constify static rxe_vm_ops that is never modified. Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index bd812e00988e..d22431e3a908 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma) kref_put(&ip->ref, rxe_mmap_release); } -static struct vm_operations_struct rxe_vm_ops = { +static const struct vm_operations_struct rxe_vm_ops = { .open = rxe_vma_open, .close = rxe_vma_close, }; -- cgit v1.2.3 From 63a5f483af0ead9e936e8580c6a029b13819b4dc Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 30 May 2017 09:47:34 +0300 Subject: IB/cma: Set default gid type to RoCEv2 RoCEv2 is the preferred RDMA protocol for Ethernet link layer because of its advantages over RoCEv1. For better user experience make it the default choice for RDMA_CM connections if device/port support it. Signed-off-by: Moni Shoua Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0eb393237ba2..476de9a18948 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 +#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP static const char * const cma_events[] = { [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", @@ -4280,8 +4281,12 @@ static void cma_add_one(struct ib_device *device) for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { supported_gids = roce_gid_type_mask_support(device, i); WARN_ON(!supported_gids); - cma_dev->default_gid_type[i - rdma_start_port(device)] = - find_first_bit(&supported_gids, BITS_PER_LONG); + if (supported_gids & CMA_PREFERRED_ROCE_GID_TYPE) + cma_dev->default_gid_type[i - rdma_start_port(device)] = + CMA_PREFERRED_ROCE_GID_TYPE; + else + cma_dev->default_gid_type[i - rdma_start_port(device)] = + find_first_bit(&supported_gids, BITS_PER_LONG); cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0; } -- cgit v1.2.3 From 4619ed40d916d32c253cabaeffa32ac98e0876fa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 1 Jun 2017 09:24:34 +0300 Subject: RDMA/bnxt_re: Delete unsupported modify_port function There is no need to return always zero for function which is not supported. The IB stack treats uninitialized ib_device->functions as not implemented. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Reviewed-by: Yuval Shaia Acked-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 17 ----------------- drivers/infiniband/hw/bnxt_re/ib_verbs.h | 3 --- drivers/infiniband/hw/bnxt_re/main.c | 1 - 3 files changed, 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f0e01b3ac711..5dc6e7ce3ab9 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -313,23 +313,6 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, return 0; } -int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num, - int port_modify_mask, - struct ib_port_modify *port_modify) -{ - switch (port_modify_mask) { - case IB_PORT_SHUTDOWN: - break; - case IB_PORT_INIT_TYPE: - break; - case IB_PORT_RESET_QKEY_CNTR: - break; - default: - break; - } - return 0; -} - int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index a0bb7e33d7ca..1df11ed272ea 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -141,9 +141,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev, struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); -int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num, - int port_modify_mask, - struct ib_port_modify *port_modify); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index ceae2d92fb08..deec8002dd36 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -474,7 +474,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->modify_device = bnxt_re_modify_device; ibdev->query_port = bnxt_re_query_port; - ibdev->modify_port = bnxt_re_modify_port; ibdev->get_port_immutable = bnxt_re_get_port_immutable; ibdev->query_pkey = bnxt_re_query_pkey; ibdev->query_gid = bnxt_re_query_gid; -- cgit v1.2.3 From f201777a5382e0a4152342365e928c1de99815ad Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 7 Jun 2017 15:42:02 -0500 Subject: IB/qib: remove duplicate code Remove duplicate code. Addresses-Coverity-ID: 1226951 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_mad.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index da295e0392ed..c5eaa3c0ed24 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -874,8 +874,6 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ib_dispatch_event(&event); } - ret = subn_get_portinfo(smp, ibdev, port); - /* restore re-reg bit per o14-12.2.1 */ pip->clientrereg_resv_subnetto |= clientrereg; -- cgit v1.2.3 From 44b0b7455f7a6c4bff3cda804719d2ed4494b4da Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 14 Jun 2017 23:13:33 +0300 Subject: IB/usnic: Implement get_netdev hook usnic's get_netdev hook for struct ib_device is missing - add it. Signed-off-by: Yuval Shaia Reviewed-by: Christian Benvenuti Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 + drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 10 ++++++++++ drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 1 + 3 files changed, 12 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c0c1e8b027b1..80577b9d2796 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -409,6 +409,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) us_ibdev->ib_dev.query_port = usnic_ib_query_port; us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; + us_ibdev->ib_dev.get_netdev = usnic_get_netdev; us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 4996984885c2..f9dc1e80c3b7 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -424,6 +424,16 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } +struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(device); + + if (us_ibdev->netdev) + dev_hold(us_ibdev->netdev); + + return us_ibdev->netdev; +} + int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 172e43b6fa95..1fda94425116 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -48,6 +48,7 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, struct ib_qp_init_attr *qp_init_attr); int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); +struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num); int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, -- cgit v1.2.3 From d41861942fc55c14b6280d9568a0d0112037f065 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Wed, 14 Jun 2017 23:13:34 +0300 Subject: IB/core: Add generic function to extract IB speed from netdev Logic of retrieving netdev speed from net_device and translating it to IB speed is implemented in rxe, in usnic and in bnxt drivers. Define new function which merges all. Signed-off-by: Yuval Shaia Reviewed-by: Christian Benvenuti Reviewed-by: Selvin Xavier Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/core/roce_gid_mgmt.c | 2 + drivers/infiniband/core/verbs.c | 55 ++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 49 ++----------------------- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 31 +++------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 53 +++------------------------ include/rdma/ib_verbs.h | 1 + 6 files changed, 73 insertions(+), 118 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 94a9eefb3cfc..90e3889b7fbe 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -44,6 +44,8 @@ static struct workqueue_struct *gid_cache_wq; +static struct workqueue_struct *gid_cache_wq; + enum gid_op_type { GID_DEL = 0, GID_ADD diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fb98ed67d5bc..40de69bf07cd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1302,6 +1302,61 @@ int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) +{ + int rc; + u32 netdev_speed; + struct net_device *netdev; + struct ethtool_link_ksettings lksettings; + + if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + if (!dev->get_netdev) + return -EOPNOTSUPP; + + netdev = dev->get_netdev(dev, port_num); + if (!netdev) + return -ENODEV; + + rtnl_lock(); + rc = __ethtool_get_link_ksettings(netdev, &lksettings); + rtnl_unlock(); + + dev_put(netdev); + + if (!rc) { + netdev_speed = lksettings.base.speed; + } else { + netdev_speed = SPEED_1000; + pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name, + netdev_speed); + } + + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } + + return 0; +} +EXPORT_SYMBOL(ib_get_eth_speed); + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 5dc6e7ce3ab9..b10e1a6dce84 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -223,50 +223,6 @@ int bnxt_re_modify_device(struct ib_device *ibdev, return 0; } -static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width) -{ - struct ethtool_link_ksettings lksettings; - u32 espeed; - - if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) { - memset(&lksettings, 0, sizeof(lksettings)); - rtnl_lock(); - netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings); - rtnl_unlock(); - espeed = lksettings.base.speed; - } else { - espeed = SPEED_UNKNOWN; - } - switch (espeed) { - case SPEED_1000: - *speed = IB_SPEED_SDR; - *width = IB_WIDTH_1X; - break; - case SPEED_10000: - *speed = IB_SPEED_QDR; - *width = IB_WIDTH_1X; - break; - case SPEED_20000: - *speed = IB_SPEED_DDR; - *width = IB_WIDTH_4X; - break; - case SPEED_25000: - *speed = IB_SPEED_EDR; - *width = IB_WIDTH_1X; - break; - case SPEED_40000: - *speed = IB_SPEED_QDR; - *width = IB_WIDTH_4X; - break; - case SPEED_50000: - break; - default: - *speed = IB_SPEED_SDR; - *width = IB_WIDTH_1X; - break; - } -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) @@ -308,8 +264,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, * IB stack to avoid race in the NETDEV_UNREG path */ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - __to_ib_speed_width(rdev->netdev, &port_attr->active_speed, - &port_attr->active_width); + if (!ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width)) + return -EINVAL; return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index f9dc1e80c3b7..e5f57dd49980 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -226,27 +226,6 @@ static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) spin_unlock(&vf->lock); } -static void eth_speed_to_ib_speed(int speed, u8 *active_speed, - u8 *active_width) -{ - if (speed <= 10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (speed <= 20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (speed <= 30000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_QDR; - } else if (speed <= 40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) { if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN || @@ -326,12 +305,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); - struct ethtool_link_ksettings cmd; usnic_dbg("\n"); mutex_lock(&us_ibdev->usdev_lock); - __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd); + if (!ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width)) { + mutex_unlock(&us_ibdev->usdev_lock); + return -EINVAL; + } + /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -355,8 +338,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; - eth_speed_to_ib_speed(cmd.base.speed, &props->active_speed, - &props->active_width); props->max_mtu = IB_MTU_4096; props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu); /* Userspace will adjust for hdrs */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index af90a7d42b96..e6c10e43a6d7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -51,40 +51,16 @@ static int rxe_query_device(struct ib_device *dev, return 0; } -static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed, - u8 *active_width) -{ - if (speed <= 1000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_SDR; - } else if (speed <= 10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (speed <= 20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (speed <= 30000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_QDR; - } else if (speed <= 40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - static int rxe_query_port(struct ib_device *dev, u8 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; - u32 speed; + int rc = -EINVAL; if (unlikely(port_num != 1)) { pr_warn("invalid port_number %d\n", port_num); - goto err1; + goto out; } port = &rxe->port; @@ -93,29 +69,12 @@ static int rxe_query_port(struct ib_device *dev, *attr = port->attr; mutex_lock(&rxe->usdev_lock); - if (rxe->ndev->ethtool_ops->get_link_ksettings) { - struct ethtool_link_ksettings ks; - - rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks); - speed = ks.base.speed; - } else if (rxe->ndev->ethtool_ops->get_settings) { - struct ethtool_cmd cmd; - - rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); - speed = cmd.speed; - } else { - pr_warn("%s speed is unknown, defaulting to 1000\n", - rxe->ndev->name); - speed = 1000; - } - rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, - &attr->active_width); + rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, + &attr->active_width); mutex_unlock(&rxe->usdev_lock); - return 0; - -err1: - return -EINVAL; +out: + return rc; } static int rxe_query_gid(struct ib_device *device, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..68d947dac9a2 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3555,6 +3555,7 @@ void ib_drain_qp(struct ib_qp *qp); int ib_resolve_eth_dmac(struct ib_device *device, struct rdma_ah_attr *ah_attr); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { -- cgit v1.2.3 From e1267b01240ab031a9c9dd84c1ffeb23670b590f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 26 Jun 2017 08:58:22 +0300 Subject: RDMA: Remove useless MODULE_VERSION All modules in drivers/infiniband defined and used MODULE_VERSION, which was pointless because the kernel version describes their state more accurate then those arbitrary numbers. Signed-off-by: Leon Romanovsky Acked-by: Sagi Grimbrg Reviewed-by: Sagi Grimberg Acked-by: Dennis Dalessandro Reviewed-by: Dennis Dalessandro Acked-by: Selvin Xavier Acked-by: Ram Amrani Reviewed-by: Johannes Thumshirn Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 1 - drivers/infiniband/hw/cxgb3/iwch.c | 1 - drivers/infiniband/hw/cxgb4/device.c | 1 - drivers/infiniband/hw/hfi1/driver.c | 1 - drivers/infiniband/hw/i40iw/i40iw_main.c | 1 - drivers/infiniband/hw/mlx4/main.c | 1 - drivers/infiniband/hw/mlx5/main.c | 1 - drivers/infiniband/hw/mthca/mthca_main.c | 1 - drivers/infiniband/hw/nes/nes.c | 1 - drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 - drivers/infiniband/hw/qedr/main.c | 1 - drivers/infiniband/hw/qedr/qedr.h | 1 - drivers/infiniband/hw/qib/qib_driver.c | 1 - drivers/infiniband/hw/usnic/usnic_ib_main.c | 1 - drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 1 - drivers/infiniband/sw/rxe/rxe.c | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 - drivers/infiniband/ulp/iser/iscsi_iser.c | 1 - drivers/infiniband/ulp/isert/ib_isert.c | 1 - drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 1 - drivers/infiniband/ulp/srp/ib_srp.c | 1 - 21 files changed, 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index deec8002dd36..fa123e9d6b59 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -70,7 +70,6 @@ static char version[] = MODULE_AUTHOR("Eddie Wai "); MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(ROCE_DRV_MODULE_VERSION); /* globals */ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 47b2ce2ef203..591de319c178 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -45,7 +45,6 @@ MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static void open_rnic_dev(struct t3cdev *); static void close_rnic_dev(struct t3cdev *); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae0b79aeea2e..fc886f81b885 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -44,7 +44,6 @@ MODULE_AUTHOR("Steve Wise"); MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static int allow_db_fc_on_t5; module_param(allow_db_fc_on_t5, int, 0644); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a50870e455a3..79a8b05855ac 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -96,7 +96,6 @@ MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("Intel Omni-Path Architecture driver"); -MODULE_VERSION(HFI1_DRIVER_VERSION); /* * MAX_PKT_RCV is the max # if packets processed per receive interrupt. diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index ae8463ff59a7..cc742c3132c6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -77,7 +77,6 @@ MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2"); MODULE_AUTHOR("Intel Corporation, "); MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); static struct i40e_client i40iw_client; static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw"; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d1b43cbbfea7..a422df781caf 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -70,7 +70,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int mlx4_ib_sm_guid_assign = 0; module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..faafea0b7c2f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -65,7 +65,6 @@ MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRIVER_VERSION); static char mlx5_version[] = DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index c309e5c96383..1b10d21c8026 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -49,7 +49,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a30aa6527f7e..26262919f5d1 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -63,7 +63,6 @@ MODULE_AUTHOR("NetEffect"); MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int interrupt_mod_interval = 0; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 57c9a2ad0260..757c65816295 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -58,7 +58,6 @@ #include "ocrdma_stats.h" #include -MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b5851fd67d4f..0ae30f5c8cbc 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -47,7 +47,6 @@ MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver"); MODULE_AUTHOR("QLogic Corporation"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(QEDR_MODULE_VERSION); #define QEDR_WQ_MULTIPLIER_DFT (3) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index ab7784bfdac6..392e76e26840 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -41,7 +41,6 @@ #include #include "qedr_hsi_rdma.h" -#define QEDR_MODULE_VERSION "8.10.10.0" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" #define DP_NAME(dev) ((dev)->ibdev.name) diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 2b5982f743ef..719906a9fd51 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -66,7 +66,6 @@ MODULE_PARM_DESC(compat_ddr_negotiate, MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel "); MODULE_DESCRIPTION("Intel IB driver"); -MODULE_VERSION(QIB_DRIVER_VERSION); /* * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 80577b9d2796..e69c8e476a2b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -721,7 +721,6 @@ static void __exit usnic_ib_destroy(void) MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); MODULE_AUTHOR("Upinder Malhi "); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR); module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 34ebc7615411..e76565280afa 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1119,5 +1119,4 @@ module_exit(pvrdma_cleanup); MODULE_AUTHOR("VMware, Inc"); MODULE_DESCRIPTION("VMware Paravirtual RDMA driver"); -MODULE_VERSION(DRV_VERSION); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index c21c913f911a..8c3d30b3092d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -38,7 +38,6 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION("0.2"); /* free resources for all ports on a device */ static void rxe_cleanup_ports(struct rxe_dev *rxe) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4ce315c92b48..7dcdbbacbf46 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -60,7 +60,6 @@ const char ipoib_driver_version[] = DRV_VERSION; MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 37b33d708c2d..19624e023ebd 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -77,7 +77,6 @@ MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); -MODULE_VERSION(DRV_VER); static struct scsi_host_template iscsi_iser_sht; static struct iscsi_transport iscsi_iser_transport; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 0e662656ef42..ceabdb85df8b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2710,7 +2710,6 @@ static void __exit isert_exit(void) } MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure"); -MODULE_VERSION("1.0"); MODULE_AUTHOR("nab@Linux-iSCSI.org"); MODULE_LICENSE("GPL"); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index cf768dd78d1b..6beddef26018 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -1053,4 +1053,3 @@ module_exit(opa_vnic_deinit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel Corporation"); MODULE_DESCRIPTION("Intel OPA Virtual Network driver"); -MODULE_VERSION(DRV_VERSION); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 2354c742caa1..fa5ccdb3bb2a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -62,7 +62,6 @@ MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_VERSION(DRV_VERSION); MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) -- cgit v1.2.3 From 5fac5b1b297fe1702f4c65d3b16aefb7d52967ab Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 29 Jun 2017 12:28:10 -0700 Subject: RDMA/bnxt_re: Add vlan tag for untagged RoCE traffic when PFC is configured Current implementation does not program vlan header insertion in RoCE packet if no vlan is configured. Firmware does not add prority when there is no vlan tag in the packet. Modify the code to insert vlan header when PFC is enabled on the interface. Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 53 ++++++++++++++++++--- drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++ drivers/infiniband/hw/bnxt_re/qplib_res.h | 2 + drivers/infiniband/hw/bnxt_re/qplib_sp.c | 77 ++++++++++++++++++++++++------- drivers/infiniband/hw/bnxt_re/qplib_sp.h | 2 + drivers/infiniband/hw/bnxt_re/roce_hsi.h | 4 +- 6 files changed, 124 insertions(+), 24 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index fa123e9d6b59..22527e35e13d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -833,6 +833,42 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev) mutex_unlock(&rdev->qp_lock); } +static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; + struct bnxt_qplib_gid gid; + u16 gid_idx, index; + int rc = 0; + + if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) + return 0; + + if (!sgid_tbl) { + dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated"); + return -EINVAL; + } + + for (index = 0; index < sgid_tbl->active; index++) { + gid_idx = sgid_tbl->hw_id[index]; + + if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, + sizeof(bnxt_qplib_gid_zero))) + continue; + /* need to modify the VLAN enable setting of non VLAN GID only + * as setting is done for VLAN GID while adding GID + */ + if (sgid_tbl->vlan[index]) + continue; + + memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); + + rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, + rdev->qplib_res.netdev->dev_addr); + } + + return rc; +} + static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) { u32 prio_map = 0, tmp_map = 0; @@ -852,8 +888,6 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) tmp_map = dcb_ieee_getapp_mask(netdev, &app); prio_map |= tmp_map; - if (!prio_map) - prio_map = -EFAULT; return prio_map; } @@ -879,10 +913,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) int rc; /* Get priority for roce */ - rc = bnxt_re_get_priority_mask(rdev); - if (rc < 0) - return rc; - prio_map = (u8)rc; + prio_map = bnxt_re_get_priority_mask(rdev); if (prio_map == rdev->cur_prio_map) return 0; @@ -904,6 +935,16 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return rc; } + /* Actual priorities are not programmed as they are already + * done by L2 driver; just enable or disable priority vlan tagging + */ + if ((prio_map == 0 && rdev->qplib_res.prio) || + (prio_map != 0 && !rdev->qplib_res.prio)) { + rdev->qplib_res.prio = prio_map ? true : false; + + bnxt_re_update_gid(rdev); + } + return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 62447b3badec..4e101704e801 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -468,9 +468,11 @@ static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res, kfree(sgid_tbl->tbl); kfree(sgid_tbl->hw_id); kfree(sgid_tbl->ctx); + kfree(sgid_tbl->vlan); sgid_tbl->tbl = NULL; sgid_tbl->hw_id = NULL; sgid_tbl->ctx = NULL; + sgid_tbl->vlan = NULL; sgid_tbl->max = 0; sgid_tbl->active = 0; } @@ -491,8 +493,15 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res, if (!sgid_tbl->ctx) goto out_free2; + sgid_tbl->vlan = kcalloc(max, sizeof(u8), GFP_KERNEL); + if (!sgid_tbl->vlan) + goto out_free3; + sgid_tbl->max = max; return 0; +out_free3: + kfree(sgid_tbl->ctx); + sgid_tbl->ctx = NULL; out_free2: kfree(sgid_tbl->hw_id); sgid_tbl->hw_id = NULL; @@ -514,6 +523,7 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res, } memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max); memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max); + memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max); sgid_tbl->active = 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e4855509719..e87207526d2c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -116,6 +116,7 @@ struct bnxt_qplib_sgid_tbl { u16 max; u16 active; void *ctx; + u8 *vlan; }; struct bnxt_qplib_pkey_tbl { @@ -188,6 +189,7 @@ struct bnxt_qplib_res { struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_pkey_tbl pkey_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; + bool prio; }; #define to_bnxt_qplib(ptr, type, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ef91ab786dd4..e277e54a05eb 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -213,6 +213,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, } memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, sizeof(bnxt_qplib_gid_zero)); + sgid_tbl->vlan[index] = 0; sgid_tbl->active--; dev_dbg(&res->pdev->dev, "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", @@ -265,28 +266,32 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct cmdq_add_gid req; struct creq_add_gid_resp resp; u16 cmd_flags = 0; - u32 temp32[4]; - u16 temp16[3]; int rc; RCFW_CMD_PREP(req, ADD_GID, cmd_flags); - memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid)); - req.gid[0] = cpu_to_be32(temp32[3]); - req.gid[1] = cpu_to_be32(temp32[2]); - req.gid[2] = cpu_to_be32(temp32[1]); - req.gid[3] = cpu_to_be32(temp32[0]); - if (vlan_id != 0xFFFF) - req.vlan = cpu_to_le16((vlan_id & - CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) | - CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | - CMDQ_ADD_GID_VLAN_VLAN_EN); + req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]); + req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]); + req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]); + req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]); + /* + * driver should ensure that all RoCE traffic is always VLAN + * tagged if RoCE traffic is running on non-zero VLAN ID or + * RoCE traffic is running on non-zero Priority. + */ + if ((vlan_id != 0xFFFF) || res->prio) { + if (vlan_id != 0xFFFF) + req.vlan = cpu_to_le16 + (vlan_id & CMDQ_ADD_GID_VLAN_VLAN_ID_MASK); + req.vlan |= cpu_to_le16 + (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | + CMDQ_ADD_GID_VLAN_VLAN_EN); + } /* MAC in network format */ - memcpy(temp16, smac, 6); - req.src_mac[0] = cpu_to_be16(temp16[0]); - req.src_mac[1] = cpu_to_be16(temp16[1]); - req.src_mac[2] = cpu_to_be16(temp16[2]); + req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]); + req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); + req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, 0); @@ -297,6 +302,9 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, /* Add GID to the sgid_tbl */ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid)); sgid_tbl->active++; + if (vlan_id != 0xFFFF) + sgid_tbl->vlan[free_idx] = 1; + dev_dbg(&res->pdev->dev, "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); @@ -306,6 +314,43 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, return 0; } +int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + struct bnxt_qplib_gid *gid, u16 gid_idx, + u8 *smac) +{ + struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl, + struct bnxt_qplib_res, + sgid_tbl); + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_modify_gid_resp resp; + struct cmdq_modify_gid req; + int rc; + u16 cmd_flags = 0; + + RCFW_CMD_PREP(req, MODIFY_GID, cmd_flags); + + req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]); + req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]); + req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]); + req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]); + if (res->prio) { + req.vlan |= cpu_to_le16 + (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 | + CMDQ_ADD_GID_VLAN_VLAN_EN); + } + + /* MAC in network format */ + req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]); + req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]); + req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]); + + req.gid_index = cpu_to_le16(gid_idx); + + rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, + (void *)&resp, NULL, 0); + return rc; +} + /* pkeys */ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 2ce7e2a32cf0..11322582f5e4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -135,6 +135,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id, bool update, u32 *index); +int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, + struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac); int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res, struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index, u16 *pkey); diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index fc23477ac52f..eeb55b2db57e 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -1473,8 +1473,8 @@ struct cmdq_modify_gid { u8 resp_size; u8 reserved8; __le64 resp_addr; - __le32 gid[4]; - __le16 src_mac[3]; + __be32 gid[4]; + __be16 src_mac[3]; __le16 vlan; #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0 -- cgit v1.2.3 From f218d67ef00431728ab7317e829006d00ecd5ca4 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 29 Jun 2017 12:28:15 -0700 Subject: RDMA/bnxt_re: Allow posting when QPs are in error This patch allows driver to post send and receive requests on QPs which are in error state. Instead of flushing the QP in the context of polling error CQEs, the QPs will be added to a flush list maintained per CQ. QP state is moved to error. QP is added to flush list if the user moves it to error state using modify_qp also. After polling the HW CQ in poll_cq routine, this flush list is traversed and driver completes work requests on each QP in the flush list, till the budget expires. The QP is moved out of flush list during QP destroy or during modify_QP to RESET. When ULPs post Work Requests while QP is in error state, driver will store the ULP data and then increment the QP producer s/w index, without ringing doorbell. It then schedules a worker to invoke the CQ handler since the interrupts wont be generated from the HW for this request. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 22 ++ drivers/infiniband/hw/bnxt_re/main.c | 3 +- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 465 +++++++++++++++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_fp.h | 25 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 26 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 10 +- 6 files changed, 462 insertions(+), 89 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b10e1a6dce84..d78fedc654d0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -786,6 +786,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) struct bnxt_re_dev *rdev = qp->rdev; int rc; + bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP"); @@ -800,6 +801,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) return rc; } + bnxt_qplib_del_flush_qp(&qp->qplib_qp); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &rdev->qp1_sqp->qplib_qp); if (rc) { @@ -1344,6 +1346,21 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, } qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state); + + if (!qp->sumem && + qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + dev_dbg(rdev_to_dev(rdev), + "Move QP = %p to flush list\n", + qp); + bnxt_qplib_add_flush_qp(&qp->qplib_qp); + } + if (!qp->sumem && + qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { + dev_dbg(rdev_to_dev(rdev), + "Move QP = %p out of flush list\n", + qp); + bnxt_qplib_del_flush_qp(&qp->qplib_qp); + } } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { qp->qplib_qp.modify_flags |= @@ -2354,6 +2371,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, } cq->qplib_cq.max_wqe = entries; cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id; + cq->qplib_cq.nq = &rdev->nq; rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2861,6 +2879,10 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) sq->send_phantom = false; } } + if (ncqe < budget) + ncqe += bnxt_qplib_process_flush_list(&cq->qplib_cq, + cqe + ncqe, + budget - ncqe); if (!ncqe) break; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 22527e35e13d..2f71123e0a9f 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1037,7 +1037,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ - rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw); + rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + BNXT_RE_MAX_QPC_COUNT); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 9af1514e5944..31e15f31bba5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -51,6 +51,168 @@ #include "qplib_fp.h" static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq); +static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp); + +static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp) +{ + qp->sq.condition = false; + qp->sq.send_phantom = false; + qp->sq.single = false; +} + +/* Flush list */ +static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + struct bnxt_qplib_cq *scq, *rcq; + + scq = qp->scq; + rcq = qp->rcq; + + if (!qp->sq.flushed) { + dev_dbg(&scq->hwq.pdev->dev, + "QPLIB: FP: Adding to SQ Flush list = %p", + qp); + bnxt_qplib_cancel_phantom_processing(qp); + list_add_tail(&qp->sq_flush, &scq->sqf_head); + qp->sq.flushed = true; + } + if (!qp->srq) { + if (!qp->rq.flushed) { + dev_dbg(&rcq->hwq.pdev->dev, + "QPLIB: FP: Adding to RQ Flush list = %p", + qp); + list_add_tail(&qp->rq_flush, &rcq->rqf_head); + qp->rq.flushed = true; + } + } +} + +void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __acquires(&qp->scq->hwq.lock) __acquires(&qp->rcq->hwq.lock) +{ + spin_lock_irqsave(&qp->scq->hwq.lock, *flags); + if (qp->scq == qp->rcq) + __acquire(&qp->rcq->hwq.lock); + else + spin_lock(&qp->rcq->hwq.lock); +} + +void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags) + __releases(&qp->scq->hwq.lock) __releases(&qp->rcq->hwq.lock) +{ + if (qp->scq == qp->rcq) + __release(&qp->rcq->hwq.lock); + else + spin_unlock(&qp->rcq->hwq.lock); + spin_unlock_irqrestore(&qp->scq->hwq.lock, *flags); +} + +static struct bnxt_qplib_cq *bnxt_qplib_find_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + if (qp->scq == qp->rcq) + buddy_cq = NULL; + else if (qp->scq == cq) + buddy_cq = qp->rcq; + else + buddy_cq = qp->scq; + return buddy_cq; +} + +static void bnxt_qplib_lock_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) + __acquires(&buddy_cq->hwq.lock) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq); + if (!buddy_cq) + __acquire(&cq->hwq.lock); + else + spin_lock(&buddy_cq->hwq.lock); +} + +static void bnxt_qplib_unlock_buddy_cq(struct bnxt_qplib_qp *qp, + struct bnxt_qplib_cq *cq) + __releases(&buddy_cq->hwq.lock) +{ + struct bnxt_qplib_cq *buddy_cq = NULL; + + buddy_cq = bnxt_qplib_find_buddy_cq(qp, cq); + if (!buddy_cq) + __release(&cq->hwq.lock); + else + spin_unlock(&buddy_cq->hwq.lock); +} + +void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_locks(qp, &flags); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_release_cq_locks(qp, &flags); +} + +static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +{ + struct bnxt_qplib_cq *scq, *rcq; + + scq = qp->scq; + rcq = qp->rcq; + + if (qp->sq.flushed) { + qp->sq.flushed = false; + list_del(&qp->sq_flush); + } + if (!qp->srq) { + if (qp->rq.flushed) { + qp->rq.flushed = false; + list_del(&qp->rq_flush); + } + } +} + +void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp) +{ + unsigned long flags; + + bnxt_qplib_acquire_cq_locks(qp, &flags); + __clean_cq(qp->scq, (u64)(unsigned long)qp); + qp->sq.hwq.prod = 0; + qp->sq.hwq.cons = 0; + __clean_cq(qp->rcq, (u64)(unsigned long)qp); + qp->rq.hwq.prod = 0; + qp->rq.hwq.cons = 0; + + __bnxt_qplib_del_flush_qp(qp); + bnxt_qplib_release_cq_locks(qp, &flags); +} + +static void bnxt_qpn_cqn_sched_task(struct work_struct *work) +{ + struct bnxt_qplib_nq_work *nq_work = + container_of(work, struct bnxt_qplib_nq_work, work); + + struct bnxt_qplib_cq *cq = nq_work->cq; + struct bnxt_qplib_nq *nq = nq_work->nq; + + if (cq && nq) { + spin_lock_bh(&cq->compl_lock); + if (atomic_read(&cq->arm_state) && nq->cqn_handler) { + dev_dbg(&nq->pdev->dev, + "%s:Trigger cq = %p event nq = %p\n", + __func__, cq, nq); + nq->cqn_handler(nq, cq); + } + spin_unlock_bh(&cq->compl_lock); + } + kfree(nq_work); +} static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) @@ -119,6 +281,7 @@ static void bnxt_qplib_service_nq(unsigned long data) struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; + struct bnxt_qplib_cq *cq; int num_cqne_processed = 0; u32 sw_cons, raw_cons; u16 type; @@ -143,15 +306,17 @@ static void bnxt_qplib_service_nq(unsigned long data) q_handle = le32_to_cpu(nqcne->cq_handle_low); q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high) << 32; - bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *) - ((unsigned long)q_handle)); - if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *) - ((unsigned long)q_handle))) + cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle; + bnxt_qplib_arm_cq_enable(cq); + spin_lock_bh(&cq->compl_lock); + atomic_set(&cq->arm_state, 0); + if (!nq->cqn_handler(nq, (cq))) num_cqne_processed++; else dev_warn(&nq->pdev->dev, "QPLIB: cqn - type 0x%x not handled", type); + spin_unlock_bh(&cq->compl_lock); break; } case NQ_BASE_TYPE_DBQ_EVENT: @@ -190,6 +355,10 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) { + if (nq->cqn_wq) { + destroy_workqueue(nq->cqn_wq); + nq->cqn_wq = NULL; + } /* Make sure the HW is stopped! */ synchronize_irq(nq->vector); tasklet_disable(&nq->worker); @@ -216,7 +385,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, void *, u8 event)) { resource_size_t nq_base; - int rc; + int rc = -1; nq->pdev = pdev; nq->vector = msix_vector; @@ -227,6 +396,11 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq); + /* Have a task to schedule CQ notifiers in post send case */ + nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq"); + if (!nq->cqn_wq) + goto fail; + nq->requested = false; rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq); if (rc) { @@ -401,8 +575,8 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; @@ -615,8 +789,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->id = le32_to_cpu(resp.xid); qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET; - sq->flush_in_progress = false; - rq->flush_in_progress = false; + INIT_LIST_HEAD(&qp->sq_flush); + INIT_LIST_HEAD(&qp->rq_flush); + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = (void *)qp; return 0; @@ -963,13 +1139,19 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, u16 cmd_flags = 0; int rc; + rcfw->qp_tbl[qp->id].qp_id = BNXT_QPLIB_QP_ID_INVALID; + rcfw->qp_tbl[qp->id].qp_handle = NULL; + RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags); req.qp_cid = cpu_to_le32(qp->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL, 0); - if (rc) + if (rc) { + rcfw->qp_tbl[qp->id].qp_id = qp->id; + rcfw->qp_tbl[qp->id].qp_handle = qp; return rc; + } /* Must walk the associated CQs to nullified the QP ptr */ spin_lock_irqsave(&qp->scq->hwq.lock, flags); @@ -1074,14 +1256,21 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, struct bnxt_qplib_swq *swq; struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr; struct sq_sge *hw_sge; + struct bnxt_qplib_nq_work *nq_work = NULL; + bool sch_handler = false; u32 sw_prod; u8 wqe_size16; int i, rc = 0, data_len = 0, pkt_num = 0; __le32 temp32; if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) { - rc = -EINVAL; - goto done; + if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + sch_handler = true; + dev_dbg(&sq->hwq.pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", + __func__); + goto queue_err; + } } if (bnxt_qplib_queue_full(sq)) { @@ -1301,12 +1490,35 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) & SQ_PSN_SEARCH_NEXT_PSN_MASK)); } - +queue_err: + if (sch_handler) { + /* Store the ULP info in the software structures */ + sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq); + swq = &sq->swq[sw_prod]; + swq->wr_id = wqe->wr_id; + swq->type = wqe->type; + swq->flags = wqe->flags; + if (qp->sig_type) + swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP; + swq->start_psn = sq->psn & BTH_PSN_MASK; + } sq->hwq.prod++; - qp->wqe_cnt++; done: + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->scq; + nq_work->nq = qp->scq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->scq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&sq->hwq.pdev->dev, + "QPLIB: FP: Failed to allocate SQ nq_work!"); + rc = -ENOMEM; + } + } return rc; } @@ -1334,15 +1546,17 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *rq = &qp->rq; struct rq_wqe *rqe, **rqe_ptr; struct sq_sge *hw_sge; + struct bnxt_qplib_nq_work *nq_work = NULL; + bool sch_handler = false; u32 sw_prod; int i, rc = 0; if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { - dev_err(&rq->hwq.pdev->dev, - "QPLIB: FP: QP (0x%x) is in the 0x%x state", - qp->id, qp->state); - rc = -EINVAL; - goto done; + sch_handler = true; + dev_dbg(&rq->hwq.pdev->dev, + "%s Error QP. Scheduling for poll_cq\n", + __func__); + goto queue_err; } if (bnxt_qplib_queue_full(rq)) { dev_err(&rq->hwq.pdev->dev, @@ -1378,7 +1592,27 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, /* Supply the rqe->wr_id index to the wr_id_tbl for now */ rqe->wr_id[0] = cpu_to_le32(sw_prod); +queue_err: + if (sch_handler) { + /* Store the ULP info in the software structures */ + sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); + rq->swq[sw_prod].wr_id = wqe->wr_id; + } + rq->hwq.prod++; + if (sch_handler) { + nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); + if (nq_work) { + nq_work->cq = qp->rcq; + nq_work->nq = qp->rcq->nq; + INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task); + queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); + } else { + dev_err(&rq->hwq.pdev->dev, + "QPLIB: FP: Failed to allocate RQ nq_work!"); + rc = -ENOMEM; + } + } done: return rc; } @@ -1471,6 +1705,9 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem; cq->period = BNXT_QPLIB_QUEUE_START_PERIOD; init_waitqueue_head(&cq->waitq); + INIT_LIST_HEAD(&cq->sqf_head); + INIT_LIST_HEAD(&cq->rqf_head); + spin_lock_init(&cq->compl_lock); bnxt_qplib_arm_cq_enable(cq); return 0; @@ -1513,9 +1750,13 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, while (*budget) { sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq); if (sw_cons == sw_prod) { - sq->flush_in_progress = false; break; } + /* Skip the FENCE WQE completions */ + if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) { + bnxt_qplib_cancel_phantom_processing(qp); + goto skip_compl; + } memset(cqe, 0, sizeof(*cqe)); cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR; cqe->opcode = CQ_BASE_CQE_TYPE_REQ; @@ -1525,6 +1766,7 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe->type = sq->swq[sw_cons].type; cqe++; (*budget)--; +skip_compl: sq->hwq.cons++; } *pcqe = cqe; @@ -1536,11 +1778,24 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, } static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, - int opcode, struct bnxt_qplib_cqe **pcqe, int *budget) + struct bnxt_qplib_cqe **pcqe, int *budget) { struct bnxt_qplib_cqe *cqe; u32 sw_prod, sw_cons; int rc = 0; + int opcode = 0; + + switch (qp->type) { + case CMDQ_CREATE_QP1_TYPE_GSI: + opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; + break; + case CMDQ_CREATE_QP_TYPE_RC: + opcode = CQ_BASE_CQE_TYPE_RES_RC; + break; + case CMDQ_CREATE_QP_TYPE_UD: + opcode = CQ_BASE_CQE_TYPE_RES_UD; + break; + } /* Flush the rest of the RQ */ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq); @@ -1567,6 +1822,21 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, return rc; } +void bnxt_qplib_mark_qp_error(void *qp_handle) +{ + struct bnxt_qplib_qp *qp = qp_handle; + + if (!qp) + return; + + /* Must block new posting of SQ and RQ */ + qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; + bnxt_qplib_cancel_phantom_processing(qp); + + /* Add qp to flush list of the CQ */ + __bnxt_qplib_add_flush_qp(qp); +} + /* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive) * CQE is track from sw_cq_cons to max_element but valid only if VALID=1 */ @@ -1694,10 +1964,12 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, cqe_sq_cons, sq->hwq.max_elements); return -EINVAL; } - /* If we were in the middle of flushing the SQ, continue */ - if (sq->flush_in_progress) - goto flush; + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } /* Require to walk the sq's swq to fabricate CQEs for all previously * signaled SWQEs due to CQE aggregation from the current sq cons * to the cqe_sq_cons @@ -1733,11 +2005,9 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, sw_sq_cons, cqe->wr_id, cqe->status); cqe++; (*budget)--; - sq->flush_in_progress = true; - /* Must block new posting of SQ and RQ */ - qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; - sq->condition = false; - sq->single = false; + bnxt_qplib_lock_buddy_cq(qp, cq); + bnxt_qplib_mark_qp_error(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } else { if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { /* Before we complete, do WA 9060 */ @@ -1768,15 +2038,6 @@ out: * the WC for this CQE */ sq->single = false; - if (!sq->flush_in_progress) - goto done; -flush: - /* Require to walk the sq's swq to fabricate CQEs for all - * previously posted SWQEs due to the error CQE received - */ - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; done: return rc; } @@ -1798,6 +2059,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } + cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->length = le32_to_cpu(hwcqe->length); @@ -1817,8 +2084,6 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1827,12 +2092,13 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } + +done: return rc; } @@ -1853,6 +2119,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->length = le32_to_cpu(hwcqe->length); @@ -1876,8 +2147,6 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1886,12 +2155,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } +done: return rc; } @@ -1932,6 +2201,11 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, "QPLIB: process_cq Raw/QP1 qp is NULL"); return -EINVAL; } + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto done; + } cqe = *pcqe; cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK; cqe->flags = le16_to_cpu(hwcqe->flags); @@ -1960,8 +2234,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, wr_id_idx, rq->hwq.max_elements); return -EINVAL; } - if (rq->flush_in_progress) - goto flush_rq; cqe->wr_id = rq->swq[wr_id_idx].wr_id; cqe++; @@ -1970,13 +2242,13 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, *pcqe = cqe; if (hwcqe->status != CQ_RES_RC_STATUS_OK) { - rq->flush_in_progress = true; -flush_rq: - rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe, - budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); } + +done: return rc; } @@ -1990,7 +2262,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe; u32 sw_cons = 0, cqe_cons; int rc = 0; - u8 opcode = 0; /* Check the Status */ if (hwcqe->status != CQ_TERMINAL_STATUS_OK) @@ -2005,6 +2276,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, "QPLIB: FP: CQ Process terminal qp is NULL"); return -EINVAL; } + /* Must block new posting of SQ and RQ */ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; @@ -2023,9 +2295,12 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe_cons, sq->hwq.max_elements); goto do_rq; } - /* If we were in the middle of flushing, continue */ - if (sq->flush_in_progress) - goto flush_sq; + + if (qp->sq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + goto sq_done; + } /* Terminal CQE can also include aggregated successful CQEs prior. * So we must complete all CQEs from the current sq's cons to the @@ -2055,11 +2330,6 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, rc = -EAGAIN; goto sq_done; } - sq->flush_in_progress = true; -flush_sq: - rc = __flush_sq(sq, qp, pcqe, budget); - if (!rc) - sq->flush_in_progress = false; sq_done: if (rc) return rc; @@ -2075,26 +2345,23 @@ do_rq: cqe_cons, rq->hwq.max_elements); goto done; } + + if (qp->rq.flushed) { + dev_dbg(&cq->hwq.pdev->dev, + "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); + rc = 0; + goto done; + } + /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR * from the current rq->cons to the rq->prod regardless what the * rq->cons the terminal CQE indicates */ - rq->flush_in_progress = true; - switch (qp->type) { - case CMDQ_CREATE_QP1_TYPE_GSI: - opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1; - break; - case CMDQ_CREATE_QP_TYPE_RC: - opcode = CQ_BASE_CQE_TYPE_RES_RC; - break; - case CMDQ_CREATE_QP_TYPE_UD: - opcode = CQ_BASE_CQE_TYPE_RES_UD; - break; - } - rc = __flush_rq(rq, qp, opcode, pcqe, budget); - if (!rc) - rq->flush_in_progress = false; + /* Add qp to flush list of the CQ */ + bnxt_qplib_lock_buddy_cq(qp, cq); + __bnxt_qplib_add_flush_qp(qp); + bnxt_qplib_unlock_buddy_cq(qp, cq); done: return rc; } @@ -2115,6 +2382,33 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq, return 0; } +int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, + struct bnxt_qplib_cqe *cqe, + int num_cqes) +{ + struct bnxt_qplib_qp *qp = NULL; + u32 budget = num_cqes; + unsigned long flags; + + spin_lock_irqsave(&cq->hwq.lock, flags); + list_for_each_entry(qp, &cq->sqf_head, sq_flush) { + dev_dbg(&cq->hwq.pdev->dev, + "QPLIB: FP: Flushing SQ QP= %p", + qp); + __flush_sq(&qp->sq, qp, &cqe, &budget); + } + + list_for_each_entry(qp, &cq->rqf_head, rq_flush) { + dev_dbg(&cq->hwq.pdev->dev, + "QPLIB: FP: Flushing RQ QP= %p", + qp); + __flush_rq(&qp->rq, qp, &cqe, &budget); + } + spin_unlock_irqrestore(&cq->hwq.lock, flags); + + return num_cqes - budget; +} + int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { @@ -2205,6 +2499,7 @@ void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type) spin_lock_irqsave(&cq->hwq.lock, flags); if (arm_type) bnxt_qplib_arm_cq(cq, arm_type); - + /* Using cq->arm_state variable to track whether to issue cq handler */ + atomic_set(&cq->arm_state, 1); spin_unlock_irqrestore(&cq->hwq.lock, flags); } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 19176e06c98a..23a26d5cbe18 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -220,19 +220,20 @@ struct bnxt_qplib_q { u16 q_full_delta; u16 max_sge; u32 psn; - bool flush_in_progress; bool condition; bool single; bool send_phantom; u32 phantom_wqe_cnt; u32 phantom_cqe_cnt; u32 next_cq_cons; + bool flushed; }; struct bnxt_qplib_qp { struct bnxt_qplib_pd *pd; struct bnxt_qplib_dpi *dpi; u64 qp_handle; +#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF u32 id; u8 type; u8 sig_type; @@ -296,6 +297,8 @@ struct bnxt_qplib_qp { dma_addr_t sq_hdr_buf_map; void *rq_hdr_buf; dma_addr_t rq_hdr_buf_map; + struct list_head sq_flush; + struct list_head rq_flush; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) @@ -351,6 +354,7 @@ struct bnxt_qplib_cq { u16 period; struct bnxt_qplib_hwq hwq; u32 cnq_hw_ring_id; + struct bnxt_qplib_nq *nq; bool resize_in_progress; struct scatterlist *sghead; u32 nmap; @@ -360,6 +364,9 @@ struct bnxt_qplib_cq { unsigned long flags; #define CQ_FLAGS_RESIZE_IN_PROG 1 wait_queue_head_t waitq; + struct list_head sqf_head, rqf_head; + atomic_t arm_state; + spinlock_t compl_lock; /* synch CQ handlers */ }; #define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq) @@ -417,6 +424,13 @@ struct bnxt_qplib_nq { (struct bnxt_qplib_nq *nq, void *srq, u8 event); + struct workqueue_struct *cqn_wq; +}; + +struct bnxt_qplib_nq_work { + struct work_struct work; + struct bnxt_qplib_nq *nq; + struct bnxt_qplib_cq *cq; }; void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq); @@ -453,4 +467,13 @@ bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq); void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type); void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq); +void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp); +void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags); +void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp, + unsigned long *flags); +int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq, + struct bnxt_qplib_cqe *cqe, + int num_cqes); #endif /* __BNXT_QPLIB_FP_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 16e42754dbec..391bb7006e8f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -44,6 +44,9 @@ #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_rcfw.h" +#include "qplib_sp.h" +#include "qplib_fp.h" + static void bnxt_qplib_service_creq(unsigned long data); /* Hardware communication channel */ @@ -279,16 +282,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, struct creq_qp_event *qp_event) { struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + struct creq_qp_error_notification *err_event; struct bnxt_qplib_crsq *crsqe; unsigned long flags; + struct bnxt_qplib_qp *qp; u16 cbit, blocked = 0; u16 cookie; __le16 mcookie; + u32 qp_id; switch (qp_event->event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: + err_event = (struct creq_qp_error_notification *)qp_event; + qp_id = le32_to_cpu(err_event->xid); + qp = rcfw->qp_tbl[qp_id].qp_handle; dev_dbg(&rcfw->pdev->dev, "QPLIB: Received QP error notification"); + dev_dbg(&rcfw->pdev->dev, + "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", + qp_id, err_event->req_err_state_reason, + err_event->res_err_state_reason); + bnxt_qplib_acquire_cq_locks(qp, &flags); + bnxt_qplib_mark_qp_error(qp); + bnxt_qplib_release_cq_locks(qp, &flags); break; default: /* Command Response */ @@ -507,6 +523,7 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { + kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq); bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq); @@ -514,7 +531,8 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) } int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw) + struct bnxt_qplib_rcfw *rcfw, + int qp_tbl_sz) { rcfw->pdev = pdev; rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT; @@ -541,6 +559,12 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, if (!rcfw->crsqe_tbl) goto fail; + rcfw->qp_tbl_size = qp_tbl_sz; + rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node), + GFP_KERNEL); + if (!rcfw->qp_tbl) + goto fail; + return 0; fail: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 09ce121770cd..0ed312f17c8d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -148,6 +148,11 @@ struct bnxt_qplib_rcfw_sbuf { u32 size; }; +struct bnxt_qplib_qp_node { + u32 qp_id; /* QP id */ + void *qp_handle; /* ptr to qplib_qp */ +}; + /* RCFW Communication Channels */ struct bnxt_qplib_rcfw { struct pci_dev *pdev; @@ -181,11 +186,13 @@ struct bnxt_qplib_rcfw { /* Actual Cmd and Resp Queues */ struct bnxt_qplib_hwq cmdq; struct bnxt_qplib_crsq *crsqe_tbl; + int qp_tbl_size; + struct bnxt_qplib_qp_node *qp_tbl; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw); + struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, @@ -207,4 +214,5 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); +void bnxt_qplib_mark_qp_error(void *qp_handle); #endif /* __BNXT_QPLIB_RCFW_H__ */ -- cgit v1.2.3 From c85023e153e3824661d07307138fdeff41f6d86a Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 30 May 2017 09:42:54 +0300 Subject: IB/mlx5: Add raw ethernet local loopback support Currently, unicast/multicast loopback raw ethernet (non-RDMA) packets are sent back to the vport. A unicast loopback packet is the packet with destination MAC address the same as the source MAC address. For multicast, the destination MAC address is in the vport's multicast filter list. Moreover, the local loopback is not needed if there is one or none user space context. After this patch, the raw ethernet unicast and multicast local loopback are disabled by default. When there is more than one user space context, the local loopback is enabled. Note that when local loopback is disabled, raw ethernet packets are not looped back to the vport and are forwarded to the next routing level (eswitch, or multihost switch, or out to the wire depending on the configuration). Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 51 ++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 +++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 19 ++++++++++ 3 files changed, 73 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..85bf17a616d5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -58,6 +58,7 @@ #include #include "mlx5_ib.h" #include "cmd.h" +#include #define DRIVER_NAME "mlx5_ib" #define DRIVER_VERSION "5.0-0" @@ -1187,6 +1188,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con return 0; } +static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) +{ + int err; + + err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); + if (err) + return err; + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return err; + + mutex_lock(&dev->lb_mutex); + dev->user_td++; + + if (dev->user_td == 2) + err = mlx5_nic_vport_update_local_lb(dev->mdev, true); + + mutex_unlock(&dev->lb_mutex); + return err; +} + +static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) +{ + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || + !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) + return; + + mutex_lock(&dev->lb_mutex); + dev->user_td--; + + if (dev->user_td < 2) + mlx5_nic_vport_update_local_lb(dev->mdev, false); + + mutex_unlock(&dev->lb_mutex); +} + static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -1295,8 +1335,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, mutex_init(&context->upd_xlt_page_mutex); if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { - err = mlx5_core_alloc_transport_domain(dev->mdev, - &context->tdn); + err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); if (err) goto out_page; } @@ -1362,7 +1401,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, out_td: if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); out_page: free_page(context->upd_xlt_page); @@ -1390,7 +1429,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) - mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); + mlx5_ib_dealloc_transport_domain(dev, context->tdn); free_page(context->upd_xlt_page); deallocate_uars(dev, context); @@ -3797,6 +3836,10 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_umrc; } + if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(mdev, disable_local_lb)) + mutex_init(&dev->lb_mutex); + dev->ib_active = true; return dev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bdcf25410c99..d2b27647f939 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -652,9 +652,13 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; - struct mlx5_sq_bfreg bfreg; - struct mlx5_sq_bfreg fp_bfreg; - u8 umr_fence; + struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg fp_bfreg; + + /* protect the user_td */ + struct mutex lb_mutex; + u32 user_td; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c065132b956d..3cec683fd70f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_RFS_ACCEL #include #endif @@ -579,6 +580,18 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } +static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) +{ + int ret = 0; + + /* Disable local_lb by default */ + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + MLX5_CAP_GEN(dev, disable_local_lb)) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +} + int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; @@ -1155,6 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fs; } + err = mlx5_core_set_hca_defaults(dev); + if (err) { + dev_err(&pdev->dev, "Failed to set hca defaults\n"); + goto err_fs; + } + #ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_attach(dev->priv.eswitch); #endif -- cgit v1.2.3 From fd65f1b8eefdd20bd70cc920a7306e1e1b113d81 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 30 May 2017 09:56:05 +0300 Subject: IB/mlx5: Change logic for dispatching IB events for port state The old logic ignored link state. This led to missing IB events like when link goes down on the switch while admin state is up or to redundant events like when admin state goes up while link is down. To fix that, probe the port state on NETDEV events and compare to last known state to decide if IB events needs to be dispatched. FIxes: 5ec8c83e3ad3 ("IB/mlx5: Port events in RoCE now rely on netdev events") Signed-off-by: Moni Shoua Reviewed-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 33 +++++++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 85bf17a616d5..bfd9117da10c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -98,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) return mlx5_port_type_cap_to_rdma_ll(port_type_cap); } +static int get_port_state(struct ib_device *ibdev, + u8 port_num, + enum ib_port_state *state) +{ + struct ib_port_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + ret = mlx5_ib_query_port(ibdev, port_num, &attr); + if (!ret) + *state = attr.state; + return ret; +} + static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -115,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this, write_unlock(&ibdev->roce.netdev_lock); break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); @@ -128,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; + enum ib_port_state port_state; + if (get_port_state(&ibdev->ib_dev, 1, &port_state)) + return NOTIFY_DONE; + + if (ibdev->roce.last_port_state == port_state) + return NOTIFY_DONE; + + ibdev->roce.last_port_state = port_state; ibev.device = &ibdev->ib_dev; - ibev.event = (event == NETDEV_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + if (port_state == IB_PORT_DOWN) + ibev.event = IB_EVENT_PORT_ERR; + else if (port_state == IB_PORT_ACTIVE) + ibev.event = IB_EVENT_PORT_ACTIVE; + else + return NOTIFY_DONE; + ibev.element.port_num = 1; ib_dispatch_event(&ibev); } @@ -3793,6 +3821,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) err = mlx5_enable_eth(dev); if (err) goto err_free_port; + dev->roce.last_port_state = IB_PORT_DOWN; } err = create_dev_resources(&dev->devr); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d2b27647f939..5fb4547bbcb8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -616,6 +616,7 @@ struct mlx5_roce { struct net_device *netdev; struct notifier_block nb; atomic_t next_port; + enum ib_port_state last_port_state; }; struct mlx5_ib_dev { -- cgit v1.2.3 From 4a2da0b8c0782816f3ae6846ae7942fcbb0f8172 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 May 2017 10:05:15 +0300 Subject: IB/mlx5: Add debug control parameters for congestion control This patch adds debug control parameters for congestion control which can be read or written through debugfs. They are for reaction point and notification point nodes. These control parameters are as below: +------------------------------+-----------------------------------------+ | Name | Description | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate | When set target rate is updated to | | | current rate | |------------------------------+-----------------------------------------| |rp_clamp_tgt_rate_ati | When set update target rate based on | | | timer as well | |------------------------------+-----------------------------------------| |rp_time_reset | time between rate increase if no | | | CNP is received unit in usec | |------------------------------+-----------------------------------------| |rp_byte_reset | Number of bytes between rate inease if | | | no CNP is received | |------------------------------+-----------------------------------------| |rp_threshold | Threshold for reaction point rate | | | control | |------------------------------+-----------------------------------------| |rp_ai_rate | Rate for target rate, unit in Mbps | |------------------------------+-----------------------------------------| |rp_hai_rate | Rate for hyper increase state | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_min_dec_fac | Minimum factor by which the current | | | transmit rate can be changed when | | | processing a CNP, unit is percerntage | |------------------------------+-----------------------------------------| |rp_min_rate | Minimum value for rate limit, | | | unit in Mbps | |------------------------------+-----------------------------------------| |rp_rate_to_set_on_first_cnp | Rate that is set when first CNP is | | | received, unit is Mbps | |------------------------------+-----------------------------------------| |rp_dce_tcp_g | Used to calculate alpha | |------------------------------+-----------------------------------------| |rp_dce_tcp_rtt | Time between updates of alpha value, | | | unit is usec | |------------------------------+-----------------------------------------| |rp_rate_reduce_monitor_period | Minimum time between consecutive rate | | | reductions | |------------------------------+-----------------------------------------| |rp_initial_alpha_value | Initial value of alpha | |------------------------------+-----------------------------------------| |rp_gd | When CNP is received, flow rate is | | | reduced based on gd, rp_gd is given as | | | log2(rp_gd) | |------------------------------+-----------------------------------------| |np_cnp_dscp | dscp code point for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio_mode | 802.1p priority for generated cnp | |------------------------------+-----------------------------------------| |np_cnp_prio | cnp priority mode | +------------------------------+-----------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/Makefile | 2 +- drivers/infiniband/hw/mlx5/cmd.c | 20 ++ drivers/infiniband/hw/mlx5/cmd.h | 4 + drivers/infiniband/hw/mlx5/cong.c | 421 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/main.c | 9 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 37 +++ include/linux/mlx5/mlx5_ifc.h | 3 +- 7 files changed, 493 insertions(+), 3 deletions(-) create mode 100644 drivers/infiniband/hw/mlx5/cong.c (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 90ad2adc752f..bc6299697dda 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 18d5e1db93ed..470995fa38d2 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, MLX5_SET(query_cong_statistics_in, in, clear, reset); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } + +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; + + MLX5_SET(query_cong_params_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_PARAMS); + MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} + +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, + void *in, int in_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; + + return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index fa09228193a6..af4c24596274 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -39,4 +39,8 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, bool reset, void *out, int out_size); +int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, + void *out, int out_size); +int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, + void *in, int in_size); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c new file mode 100644 index 000000000000..2d32b519bb61 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "mlx5_ib.h" +#include "cmd.h" + +enum mlx5_ib_cong_node_type { + MLX5_IB_RROCE_ECN_RP = 1, + MLX5_IB_RROCE_ECN_NP = 2, +}; + +static const char * const mlx5_ib_dbg_cc_name[] = { + "rp_clamp_tgt_rate", + "rp_clamp_tgt_rate_ati", + "rp_time_reset", + "rp_byte_reset", + "rp_threshold", + "rp_ai_rate", + "rp_hai_rate", + "rp_min_dec_fac", + "rp_min_rate", + "rp_rate_to_set_on_first_cnp", + "rp_dce_tcp_g", + "rp_dce_tcp_rtt", + "rp_rate_reduce_monitor_period", + "rp_initial_alpha_value", + "rp_gd", + "np_cnp_dscp", + "np_cnp_prio_mode", + "np_cnp_prio", +}; + +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1) +#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2) +#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3) +#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4) +#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5) +#define MLX5_IB_RP_AI_RATE_ATTR BIT(7) +#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8) +#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9) +#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10) +#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11) +#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12) +#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13) +#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14) +#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15) +#define MLX5_IB_RP_GD_ATTR BIT(16) + +#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3) +#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4) + +static enum mlx5_ib_cong_node_type +mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset) +{ + if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE && + param_offset <= MLX5_IB_DBG_CC_RP_GD) + return MLX5_IB_RROCE_ECN_RP; + else + return MLX5_IB_RROCE_ECN_NP; +} + +static u32 mlx5_get_cc_param_val(void *field, int offset) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate); + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc); + case MLX5_IB_DBG_CC_RP_TIME_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset); + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset); + case MLX5_IB_DBG_CC_RP_THRESHOLD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_threshold); + case MLX5_IB_DBG_CC_RP_AI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate); + case MLX5_IB_DBG_CC_RP_HAI_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate); + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac); + case MLX5_IB_DBG_CC_RP_MIN_RATE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate); + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp); + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g); + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt); + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period); + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value); + case MLX5_IB_DBG_CC_RP_GD: + return MLX5_GET(cong_control_r_roce_ecn_rp, field, + rpg_gd); + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_dscp); + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_prio_mode); + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + return MLX5_GET(cong_control_r_roce_ecn_np, field, + cnp_802p_prio); + default: + return 0; + } +} + +static void mlx5_ib_set_cc_param_mask_val(void *field, int offset, + u32 var, u32 *attr_mask) +{ + switch (offset) { + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate, var); + break; + case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI: + *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + clamp_tgt_rate_after_time_inc, var); + break; + case MLX5_IB_DBG_CC_RP_TIME_RESET: + *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_time_reset, var); + break; + case MLX5_IB_DBG_CC_RP_BYTE_RESET: + *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_byte_reset, var); + break; + case MLX5_IB_DBG_CC_RP_THRESHOLD: + *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_threshold, var); + break; + case MLX5_IB_DBG_CC_RP_AI_RATE: + *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_ai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_HAI_RATE: + *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_hai_rate, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC: + *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_dec_fac, var); + break; + case MLX5_IB_DBG_CC_RP_MIN_RATE: + *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_min_rate, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP: + *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_to_set_on_first_cnp, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_G: + *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_g, var); + break; + case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT: + *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + dce_tcp_rtt, var); + break; + case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD: + *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rate_reduce_monitor_period, var); + break; + case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE: + *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + initial_alpha_value, var); + break; + case MLX5_IB_DBG_CC_RP_GD: + *attr_mask |= MLX5_IB_RP_GD_ATTR; + MLX5_SET(cong_control_r_roce_ecn_rp, field, + rpg_gd, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_DSCP: + *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var); + break; + case MLX5_IB_DBG_CC_NP_CNP_PRIO: + *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR; + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0); + MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var); + break; + } +} + +static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); + void *out; + void *field; + int err; + enum mlx5_ib_cong_node_type node; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + node = mlx5_ib_param_to_node(offset); + + err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); + if (err) + goto free; + + field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters); + *var = mlx5_get_cc_param_val(field, offset); + +free: + kvfree(out); + return err; +} + +static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); + void *in; + void *field; + enum mlx5_ib_cong_node_type node; + u32 attr_mask = 0; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_cong_params_in, in, opcode, + MLX5_CMD_OP_MODIFY_CONG_PARAMS); + + node = mlx5_ib_param_to_node(offset); + MLX5_SET(modify_cong_params_in, in, cong_protocol, node); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters); + mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask); + + field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select); + MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, + attr_mask); + + err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); + kvfree(in); + return err; +} + +static ssize_t set_param(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + char lbuf[11] = { }; + u32 var; + int ret; + + if (count > sizeof(lbuf)) + return -EINVAL; + + if (copy_from_user(lbuf, buf, count)) + return -EFAULT; + + lbuf[sizeof(lbuf) - 1] = '\0'; + + if (kstrtou32(lbuf, 0, &var)) + return -EINVAL; + + ret = mlx5_ib_set_cc_params(param->dev, offset, var); + return ret ? ret : count; +} + +static ssize_t get_param(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_ib_dbg_param *param = filp->private_data; + int offset = param->offset; + u32 var = 0; + int ret; + char lbuf[11]; + + if (*pos) + return 0; + + ret = mlx5_ib_get_cc_params(param->dev, offset, &var); + if (ret) + return ret; + + ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var); + if (ret < 0) + return ret; + + if (copy_to_user(buf, lbuf, ret)) + return -EFAULT; + + *pos += ret; + return ret; +} + +static const struct file_operations dbg_cc_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = set_param, + .read = get_param, +}; + +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root || + !dev->dbg_cc_params || + !dev->dbg_cc_params->root) + return; + + debugfs_remove_recursive(dev->dbg_cc_params->root); + kfree(dev->dbg_cc_params); + dev->dbg_cc_params = NULL; +} + +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_cc_params *dbg_cc_params; + int i; + + if (!mlx5_debugfs_root) + goto out; + + if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) || + !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) + goto out; + + dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL); + if (!dbg_cc_params) + goto out; + + dev->dbg_cc_params = dbg_cc_params; + + dbg_cc_params->root = debugfs_create_dir("cc_params", + dev->mdev->priv.dbg_root); + if (!dbg_cc_params->root) + goto err; + + for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + dbg_cc_params->params[i].offset = i; + dbg_cc_params->params[i].dev = dev; + dbg_cc_params->params[i].dentry = + debugfs_create_file(mlx5_ib_dbg_cc_name[i], + 0600, dbg_cc_params->root, + &dbg_cc_params->params[i], + &dbg_cc_fops); + if (!dbg_cc_params->params[i].dentry) + goto err; + } +out: return 0; + +err: + mlx5_ib_warn(dev, "cong debugfs failure\n"); + mlx5_ib_cleanup_cong_debugfs(dev); + /* + * We don't want to fail driver if debugfs failed to initialize, + * so we are not forwarding error to the user. + */ + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bfd9117da10c..a903728627f1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3838,9 +3838,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) goto err_odp; } + err = mlx5_ib_init_cong_debugfs(dev); + if (err) + goto err_cnt; + dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); if (!dev->mdev->priv.uar) - goto err_cnt; + goto err_cong; err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); if (err) @@ -3889,6 +3893,8 @@ err_uar_page: mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); err_cnt: + mlx5_ib_cleanup_cong_debugfs(dev); +err_cong: if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -3923,6 +3929,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); mlx5_put_uars_page(dev->mdev, mdev->priv.uar); + mlx5_ib_cleanup_cong_debugfs(dev); if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); destroy_umrc_res(dev); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5fb4547bbcb8..f0682f383b52 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -619,6 +619,39 @@ struct mlx5_roce { enum ib_port_state last_port_state; }; +struct mlx5_ib_dbg_param { + int offset; + struct mlx5_ib_dev *dev; + struct dentry *dentry; +}; + +enum mlx5_ib_dbg_cc_types { + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE, + MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI, + MLX5_IB_DBG_CC_RP_TIME_RESET, + MLX5_IB_DBG_CC_RP_BYTE_RESET, + MLX5_IB_DBG_CC_RP_THRESHOLD, + MLX5_IB_DBG_CC_RP_AI_RATE, + MLX5_IB_DBG_CC_RP_HAI_RATE, + MLX5_IB_DBG_CC_RP_MIN_DEC_FAC, + MLX5_IB_DBG_CC_RP_MIN_RATE, + MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP, + MLX5_IB_DBG_CC_RP_DCE_TCP_G, + MLX5_IB_DBG_CC_RP_DCE_TCP_RTT, + MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD, + MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE, + MLX5_IB_DBG_CC_RP_GD, + MLX5_IB_DBG_CC_NP_CNP_DSCP, + MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE, + MLX5_IB_DBG_CC_NP_CNP_PRIO, + MLX5_IB_DBG_CC_MAX, +}; + +struct mlx5_ib_dbg_cc_params { + struct dentry *root; + struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -655,6 +688,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ struct mutex lb_mutex; @@ -909,6 +943,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); +void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev); +int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev); + /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 57c75e8b3c19..3e697f672de5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1188,7 +1188,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_at_c0[0x12]; u8 cnp_dscp[0x6]; - u8 reserved_at_d8[0x5]; + u8 reserved_at_d8[0x4]; + u8 cnp_prio_mode[0x1]; u8 cnp_802p_prio[0x3]; u8 reserved_at_e0[0x720]; -- cgit v1.2.3 From 7ecf6d8ff154e6f7471ee537a400d43a5f3b1c57 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 30 May 2017 10:18:24 +0300 Subject: IB/mlx5: Restore IB guid/policy for virtual functions When a user sets port_guid, node_guid or policy of an IB virtual function, save this information in "struct mlx5_vf_context". This information will be restored later when pci_resume is called. To make sure this works, one can use aer-inject to generate PCI errors on mlx5 devices and verify if relevant fields are restored after PCI resume. Signed-off-by: Bodong Wang Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/ib_virt.c | 9 ++++++ drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 42 +++++++++++++++++++++++++ include/linux/mlx5/driver.h | 17 +++++----- 3 files changed, 61 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index c1b9de800fe5..649a3364f838 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, } in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].policy = in->policy; out: kfree(in); @@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].node_guid = guid; kfree(in); return err; } @@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_hca_vport_context *in; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; int err; in = kzalloc(sizeof(*in), GFP_KERNEL); @@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); + if (!err) + vfs_ctx[vf].port_guid = guid; kfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index bcdf7779c48d..090b29e05a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" @@ -44,6 +45,38 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return !!sriov->num_vfs; } +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct mlx5_hca_vport_context *in; + int err = 0; + + /* Restore sriov guid and policy settings */ + if (sriov->vfs_ctx[vf].node_guid || + sriov->vfs_ctx[vf].port_guid || + sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) { + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->node_guid = sriov->vfs_ctx[vf].node_guid; + in->port_guid = sriov->vfs_ctx[vf].port_guid; + in->policy = sriov->vfs_ctx[vf].policy; + in->field_select = + !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID | + !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | + !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; + + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + if (err) + mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); + + kfree(in); + } + + return err; +} + static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -74,6 +107,15 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } sriov->vfs_ctx[vf].enabled = 1; sriov->enabled_vfs++; + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { + err = sriov_restore_guids(dev, vf); + if (err) { + mlx5_core_warn(dev, + "failed to restore VF %d settings, err %d\n", + vf, err); + continue; + } + } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index df6ce59a1f95..54221be5f69e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -162,6 +162,13 @@ enum dbg_rsc_type { MLX5_DBG_RSC_CQ, }; +enum port_state_policy { + MLX5_POLICY_DOWN = 0, + MLX5_POLICY_UP = 1, + MLX5_POLICY_FOLLOW = 2, + MLX5_POLICY_INVALID = 0xffffffff +}; + struct mlx5_field_desc { struct dentry *dent; int i; @@ -525,6 +532,9 @@ struct mlx5_mkey_table { struct mlx5_vf_context { int enabled; + u64 port_guid; + u64 node_guid; + enum port_state_policy policy; }; struct mlx5_core_sriov { @@ -842,13 +852,6 @@ struct mlx5_pas { u8 log_sz; }; -enum port_state_policy { - MLX5_POLICY_DOWN = 0, - MLX5_POLICY_UP = 1, - MLX5_POLICY_FOLLOW = 2, - MLX5_POLICY_INVALID = 0xffffffff -}; - enum phy_port_state { MLX5_AAA_111 }; -- cgit v1.2.3 From 03404e8ae652e02a5e3388224836cef53d7a0988 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:13 +0300 Subject: IB/mlx5: Add support to dropless RQ RQs that were configured for "delay drop" will prevent packet drops when their WQEs are depleted. Marking an RQ to be drop-less is done by setting delay_drop_en in RQ context using CREATE_RQ command. Since this feature is globally activated/deactivated by using the SET_DELAY_DROP command on all the marked RQs, we activated/deactivated it according to the number of RQs with 'delay_drop' enabled. When timeout is expired, then the feature is deactivated. Therefore the driver handles the delay drop timeout event and reactivate it. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 60 +++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/mlx5_ib.h | 19 ++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 37 ++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 4 files changed, 113 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a903728627f1..ad4b12decc23 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -697,6 +697,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_TSO; } + if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && + MLX5_CAP_GEN(dev->mdev, general_notification_event)) + props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ @@ -2752,6 +2756,24 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); } +static void delay_drop_handler(struct work_struct *work) +{ + int err; + struct mlx5_ib_delay_drop *delay_drop = + container_of(work, struct mlx5_ib_delay_drop, + delay_drop_work); + + mutex_lock(&delay_drop->lock); + err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, + delay_drop->timeout); + if (err) { + mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", + delay_drop->timeout); + delay_drop->activate = false; + } + mutex_unlock(&delay_drop->lock); +} + static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param) { @@ -2804,8 +2826,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, ibev.event = IB_EVENT_CLIENT_REREGISTER; port = (u8)param; break; + case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + goto out; default: - return; + goto out; } ibev.device = &ibdev->ib_dev; @@ -2813,7 +2838,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (port < 1 || port > ibdev->num_ports) { mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); - return; + goto out; } if (ibdev->ib_active) @@ -2821,6 +2846,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (fatal) ibdev->ib_active = false; + +out: + return; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -3623,6 +3651,26 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void cancel_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); +} + +static void init_delay_drop(struct mlx5_ib_dev *dev) +{ + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3862,11 +3910,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (err) goto err_dev; + init_delay_drop(dev); + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { err = device_create_file(&dev->ib_dev.dev, mlx5_class_attributes[i]); if (err) - goto err_umrc; + goto err_delay_drop; } if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && @@ -3877,7 +3927,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) return dev; -err_umrc: +err_delay_drop: + cancel_delay_drop(dev); destroy_umrc_res(dev); err_dev: @@ -3924,6 +3975,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + cancel_delay_drop(dev); mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f0682f383b52..097f12dc65a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,6 +247,10 @@ struct mlx5_ib_wq { void *qend; }; +enum mlx5_ib_wq_flags { + MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1, +}; + struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; @@ -264,6 +268,7 @@ struct mlx5_ib_rwq { u32 wqe_count; u32 wqe_shift; int wq_sig; + u32 create_flags; /* Use enum mlx5_ib_wq_flags */ }; enum { @@ -652,6 +657,19 @@ struct mlx5_ib_dbg_cc_params { struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX]; }; +enum { + MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, +}; + +struct mlx5_ib_delay_drop { + struct mlx5_ib_dev *dev; + struct work_struct delay_drop_work; + /* serialize setting of delay drop */ + struct mutex lock; + u32 timeout; + bool activate; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -688,6 +706,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + struct mlx5_ib_delay_drop delay_drop; struct mlx5_ib_dbg_cc_params *dbg_cc_params; /* protect the user_td */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0889ff367c86..939553d5c25f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4597,6 +4597,24 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) } } +static int set_delay_drop(struct mlx5_ib_dev *dev) +{ + int err = 0; + + mutex_lock(&dev->delay_drop.lock); + if (dev->delay_drop.activate) + goto out; + + err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout); + if (err) + goto out; + + dev->delay_drop.activate = true; +out: + mutex_unlock(&dev->delay_drop.lock); + return err; +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4651,9 +4669,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(rqc, rqc, scatter_fcs, 1); } + if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + if (!(dev->ib_dev.attrs.raw_packet_caps & + IB_RAW_PACKET_CAP_DELAY_DROP)) { + mlx5_ib_dbg(dev, "Delay drop is not supported\n"); + err = -EOPNOTSUPP; + goto out; + } + MLX5_SET(rqc, rqc, delay_drop_en, 1); + } rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); + if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) { + err = set_delay_drop(dev); + if (err) { + mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n", + err); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); + } else { + rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP; + } + } out: kvfree(in); return err; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4bc57647fad8..f350688a12fa 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2519,7 +2519,7 @@ enum { struct mlx5_ifc_rqc_bits { u8 rlky[0x1]; - u8 reserved_at_1[0x1]; + u8 delay_drop_en[0x1]; u8 scatter_fcs[0x1]; u8 vsd[0x1]; u8 mem_rq_type[0x4]; -- cgit v1.2.3 From fe248c3a5837848717ed566fb4aefe66f43a5e53 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 May 2017 10:29:14 +0300 Subject: IB/mlx5: Add delay drop configuration and statistics Add debugfs interface for monitor the number of delay drop timeout events and the number of existing dropless RQs in the system. In addition add debugfs interface for configuring the global timeout value which is used in the SET_DELAY_DROP command. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 103 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 10 ++++ drivers/infiniband/hw/mlx5/qp.c | 13 ++++- 3 files changed, 123 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ad4b12decc23..7455f95bf679 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -2763,6 +2764,8 @@ static void delay_drop_handler(struct work_struct *work) container_of(work, struct mlx5_ib_delay_drop, delay_drop_work); + atomic_inc(&delay_drop->events_cnt); + mutex_lock(&delay_drop->lock); err = mlx5_core_set_delay_drop(delay_drop->dev->mdev, delay_drop->timeout); @@ -3651,12 +3654,107 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!dev->delay_drop.dbg) + return; + debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs); + kfree(dev->delay_drop.dbg); + dev->delay_drop.dbg = NULL; +} + static void cancel_delay_drop(struct mlx5_ib_dev *dev) { if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) return; cancel_work_sync(&dev->delay_drop.delay_drop_work); + delay_drop_debugfs_cleanup(dev); +} + +static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + char lbuf[20]; + int len; + + len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout); + return simple_read_from_buffer(buf, count, pos, lbuf, len); +} + +static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_ib_delay_drop *delay_drop = filp->private_data; + u32 timeout; + u32 var; + + if (kstrtouint_from_user(buf, count, 0, &var)) + return -EFAULT; + + timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS * + 1000); + if (timeout != var) + mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n", + timeout); + + delay_drop->timeout = timeout; + + return count; +} + +static const struct file_operations fops_delay_drop_timeout = { + .owner = THIS_MODULE, + .open = simple_open, + .write = delay_drop_timeout_write, + .read = delay_drop_timeout_read, +}; + +static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dbg_delay_drop *dbg; + + if (!mlx5_debugfs_root) + return 0; + + dbg = kzalloc(sizeof(*dbg), GFP_KERNEL); + if (!dbg) + return -ENOMEM; + + dbg->dir_debugfs = + debugfs_create_dir("delay_drop", + dev->mdev->priv.dbg_root); + if (!dbg->dir_debugfs) + return -ENOMEM; + + dbg->events_cnt_debugfs = + debugfs_create_atomic_t("num_timeout_events", 0400, + dbg->dir_debugfs, + &dev->delay_drop.events_cnt); + if (!dbg->events_cnt_debugfs) + goto out_debugfs; + + dbg->rqs_cnt_debugfs = + debugfs_create_atomic_t("num_rqs", 0400, + dbg->dir_debugfs, + &dev->delay_drop.rqs_cnt); + if (!dbg->rqs_cnt_debugfs) + goto out_debugfs; + + dbg->timeout_debugfs = + debugfs_create_file("timeout", 0600, + dbg->dir_debugfs, + &dev->delay_drop, + &fops_delay_drop_timeout); + if (!dbg->timeout_debugfs) + goto out_debugfs; + + return 0; + +out_debugfs: + delay_drop_debugfs_cleanup(dev); + return -ENOMEM; } static void init_delay_drop(struct mlx5_ib_dev *dev) @@ -3669,6 +3767,11 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) dev->delay_drop.activate = false; dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (delay_drop_debugfs_init(dev)) + mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } static void *mlx5_ib_add(struct mlx5_core_dev *mdev) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 097f12dc65a6..0316147a39a8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -661,6 +661,13 @@ enum { MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100, }; +struct mlx5_ib_dbg_delay_drop { + struct dentry *dir_debugfs; + struct dentry *rqs_cnt_debugfs; + struct dentry *events_cnt_debugfs; + struct dentry *timeout_debugfs; +}; + struct mlx5_ib_delay_drop { struct mlx5_ib_dev *dev; struct work_struct delay_drop_work; @@ -668,6 +675,9 @@ struct mlx5_ib_delay_drop { struct mutex lock; u32 timeout; bool activate; + atomic_t events_cnt; + atomic_t rqs_cnt; + struct mlx5_ib_dbg_delay_drop *dbg; }; struct mlx5_ib_dev { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 939553d5c25f..c5d8ec839e99 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -675,10 +675,14 @@ err_umem: return err; } -static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq) +static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_rwq *rwq) { struct mlx5_ib_ucontext *context; + if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP) + atomic_dec(&dev->delay_drop.rqs_cnt); + context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &rwq->db); if (rwq->umem) @@ -4612,6 +4616,9 @@ static int set_delay_drop(struct mlx5_ib_dev *dev) dev->delay_drop.activate = true; out: mutex_unlock(&dev->delay_drop.lock); + + if (!err) + atomic_inc(&dev->delay_drop.rqs_cnt); return err; } @@ -4824,7 +4831,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, err_copy: mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: - destroy_user_rq(pd, rwq); + destroy_user_rq(dev, pd, rwq); err: kfree(rwq); return ERR_PTR(err); @@ -4836,7 +4843,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) struct mlx5_ib_rwq *rwq = to_mrwq(wq); mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); - destroy_user_rq(wq->pd, rwq); + destroy_user_rq(dev, wq->pd, rwq); kfree(rwq); return 0; -- cgit v1.2.3 From 5236333592244557a19694a51337df6ac018f0a7 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Jun 2017 11:14:02 +0300 Subject: IB/core: Fix the validations of a multicast LID in attach or detach operations RoCE Annex (A16.9.10/11) declares that during attach (detach) QP to a multicast group, if the QP is associated with a RoCE port, the multicast group MLID is unused and is ignored. During attach or detach multicast, when the QP is associated with a port, it is enough to check the port's link layer and validate the LID only if it is Infiniband. Otherwise, avoid validating the multicast LID. Fixes: 8561eae60ff9 ("IB/core: For multicast functions, verify that LIDs are multicast LIDs") Signed-off-by: Noa Osherovich Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 44 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fb98ed67d5bc..802bdc397a57 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1569,6 +1569,44 @@ EXPORT_SYMBOL(ib_dealloc_fmr); /* Multicast groups */ +static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) +{ + struct ib_qp_init_attr init_attr = {}; + struct ib_qp_attr attr = {}; + int num_eth_ports = 0; + int port; + + /* If QP state >= init, it is assigned to a port and we can check this + * port only. + */ + if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) { + if (attr.qp_state >= IB_QPS_INIT) { + if (qp->device->get_link_layer(qp->device, attr.port_num) != + IB_LINK_LAYER_INFINIBAND) + return true; + goto lid_check; + } + } + + /* Can't get a quick answer, iterate over all ports */ + for (port = 0; port < qp->device->phys_port_cnt; port++) + if (qp->device->get_link_layer(qp->device, port) != + IB_LINK_LAYER_INFINIBAND) + num_eth_ports++; + + /* If we have at lease one Ethernet port, RoCE annex declares that + * multicast LID should be ignored. We can't tell at this step if the + * QP belongs to an IB or Ethernet port. + */ + if (num_eth_ports) + return true; + + /* If all the ports are IB, we can check according to IB spec. */ +lid_check: + return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || + lid == be16_to_cpu(IB_LID_PERMISSIVE)); +} + int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; @@ -1576,8 +1614,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->attach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || - lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || - lid == be16_to_cpu(IB_LID_PERMISSIVE)) + !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->attach_mcast(qp, gid, lid); @@ -1594,8 +1631,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->detach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || - lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || - lid == be16_to_cpu(IB_LID_PERMISSIVE)) + !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->detach_mcast(qp, gid, lid); -- cgit v1.2.3 From be1d325a335840a86c133a56c6a911c368bac0fd Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Jun 2017 11:14:03 +0300 Subject: IB/core: Set RoCEv2 MGID according to spec RoCEv2 Annex states that for RoCEv2 over IPv4, the corresponding IPv4 address is encoded into the GID according to the following rule: GID= :ffff: Remove the 0xff0e prefix for RoCEv2 packets with IPv4 and leave it zeroed and change rdma_is_multicast_addr() to consider the new logic. Signed-off-by: Noa Osherovich Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 13 +++++++------ drivers/infiniband/core/verbs.c | 10 ++++++---- include/rdma/ib_addr.h | 8 +++++++- 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0eb393237ba2..a8c2f0ccd225 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3998,7 +3998,8 @@ static void iboe_mcast_work_handler(struct work_struct *work) kfree(mw); } -static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, + enum ib_gid_type gid_type) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; @@ -4008,8 +4009,8 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) } else if (addr->sa_family == AF_INET6) { memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { - mgid->raw[0] = 0xff; - mgid->raw[1] = 0x0e; + mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0; + mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0; mgid->raw[2] = 0; mgid->raw[3] = 0; mgid->raw[4] = 0; @@ -4050,7 +4051,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, goto out1; } - cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); + gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - + rdma_start_port(id_priv->cma_dev->device)]; + cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) @@ -4066,8 +4069,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, mc->multicast.ib->rec.hop_limit = 1; mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(id_priv->cma_dev->device)]; if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 802bdc397a57..30fdc3ae1bbd 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1613,8 +1613,9 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->attach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || - !is_valid_mcast_lid(qp, lid)) + + if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || + qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->attach_mcast(qp, gid, lid); @@ -1630,8 +1631,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) if (!qp->device->detach_mcast) return -ENOSYS; - if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD || - !is_valid_mcast_lid(qp, lid)) + + if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || + qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) return -EINVAL; ret = qp->device->detach_mcast(qp, gid, lid); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index b73a14edc85e..7aca12188ef3 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -304,7 +304,13 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) static inline int rdma_is_multicast_addr(struct in6_addr *addr) { - return addr->s6_addr[0] == 0xff; + u32 ipv4_addr; + + if (addr->s6_addr[0] == 0xff) + return 1; + + memcpy(&ipv4_addr, addr->s6_addr + 12, 4); + return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr)); } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) -- cgit v1.2.3 From 9636a56fa864464896bf7d1272c701f2b9a57737 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Jun 2017 11:14:04 +0300 Subject: IB/core: Add support for RoCEv2 multicast When creating address handle from multicast GID, set MAC according to the appropriate formula instead of searching for it in the GID table: - For IPv4 multicast GID use ip_eth_mc_map(). - For IPv6 multicast GID use ipv6_eth_mc_map(). Signed-off-by: Noa Osherovich Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 30fdc3ae1bbd..e8006677b01c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1244,6 +1244,18 @@ int ib_resolve_eth_dmac(struct ib_device *device, if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) { rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, ah_attr->roce.dmac); + return 0; + } + if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { + if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { + __be32 addr = 0; + + memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4); + ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac); + } else { + ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw, + (char *)ah_attr->roce.dmac); + } } else { union ib_gid sgid; struct ib_gid_attr sgid_attr; -- cgit v1.2.3 From 2dee0e545894c23b1a2cc2019ac87dffb42e5984 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:07 +0300 Subject: IB/uverbs: Enable QP creation with a given source QP number Enable QP creation with a given source QP number, the created QP will use the source QPN as its wire QP number. To create such a QP, root privileges (i.e. CAP_NET_RAW) are required from the user application. This comes as a pre-patch for downstream patches in this series to allow user space applications to accelerate traffic which is typically handled by IPoIB ULP. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 17 ++++++++++++++--- include/uapi/rdma/ib_user_verbs.h | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..60535c754db3 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1383,8 +1383,9 @@ static int create_qp(struct ib_uverbs_file *file, attr.rwq_ind_tbl = ind_tbl; } - if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + - sizeof(cmd->reserved1)) && cmd->reserved1) { + if (cmd_sz > sizeof(*cmd) && + !ib_is_udata_cleared(ucore, sizeof(*cmd), + cmd_sz - sizeof(*cmd))) { ret = -EOPNOTSUPP; goto err_put; } @@ -1482,11 +1483,21 @@ static int create_qp(struct ib_uverbs_file *file, IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV | IB_QP_CREATE_SCATTER_FCS | - IB_QP_CREATE_CVLAN_STRIPPING)) { + IB_QP_CREATE_CVLAN_STRIPPING | + IB_QP_CREATE_SOURCE_QPN)) { ret = -EINVAL; goto err_put; } + if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) { + if (!capable(CAP_NET_RAW)) { + ret = -EPERM; + goto err_put; + } + + attr.source_qpn = cmd->source_qpn; + } + buf = (void *)cmd + sizeof(*cmd); if (cmd_sz > sizeof(*cmd)) if (!(buf[0] == 0 && !memcmp(buf, buf + 1, diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 270c350bedc6..63656d2e8705 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -578,7 +578,7 @@ struct ib_uverbs_ex_create_qp { __u32 comp_mask; __u32 create_flags; __u32 rwq_ind_tbl_handle; - __u32 reserved1; + __u32 source_qpn; }; struct ib_uverbs_open_qp { -- cgit v1.2.3 From c2e53b2ce1ba351918ede492c0cb207f42e1228f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:08 +0300 Subject: IB/mlx5: Add support for QP with a given source QPN Allow user space applications to accelerate send and receive traffic which is typically handled by IPoIB ULP by creating a UD QP with a given source QPN of the IPoIB UD QP. UD QP with a given source QPN should basically be similar to RAW QP from point of view of its created resources. However, - Its TIS should point to the source QPN. - Modify can be done only on its state as the transport attributes are managed by its source QP. This patch manages below: - Creating/destroying/modifying UD QP with a given source QPN. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 + drivers/infiniband/hw/mlx5/qp.c | 72 ++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 0316147a39a8..7ac991070020 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -383,6 +383,7 @@ struct mlx5_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; u32 rate_limit; + u32 underlay_qpn; }; struct mlx5_ib_cq_buf { @@ -404,6 +405,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9, + MLX5_IB_QP_UNDERLAY = 1 << 10, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c5d8ec839e99..5c7ce9bd466e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "mlx5_ib.h" /* not supported currently */ @@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev, return -EINVAL; } - if (attr->qp_type == IB_QPT_RAW_PACKET) { + if (attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift; qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6; } else { @@ -1025,12 +1027,16 @@ static int is_connected(enum ib_qp_type qp_type) } static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, struct mlx5_ib_sq *sq, u32 tdn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (qp->flags & MLX5_IB_QP_UNDERLAY) + MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); + return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } @@ -1233,7 +1239,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, u32 tdn = mucontext->tdn; if (qp->sq.wqe_cnt) { - err = create_raw_packet_qp_tis(dev, sq, tdn); + err = create_raw_packet_qp_tis(dev, qp, sq, tdn); if (err) return err; @@ -1506,10 +1512,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 *in; int err; - base = init_attr->qp_type == IB_QPT_RAW_PACKET ? - &qp->raw_packet_qp.rq.base : - &qp->trans_qp.base; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -1591,10 +1593,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { + if (init_attr->qp_type != IB_QPT_UD || + (MLX5_CAP_GEN(dev->mdev, port_type) != + MLX5_CAP_PORT_TYPE_IB) || + !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) { + mlx5_ib_dbg(dev, "Source QP option isn't supported\n"); + return -EOPNOTSUPP; + } + + qp->flags |= MLX5_IB_QP_UNDERLAY; + qp->underlay_qpn = init_attr->source_qpn; + } } else { qp->wq_sig = !!wq_signature; } + base = (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? + &qp->raw_packet_qp.rq.base : + &qp->trans_qp.base; + qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, (pd && pd->uobject) ? &ucmd : NULL); @@ -1745,7 +1765,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_LSO; } - if (init_attr->qp_type == IB_QPT_RAW_PACKET) { + if (init_attr->qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, pd); @@ -1897,7 +1918,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; - struct mlx5_ib_qp_base *base = &qp->trans_qp.base; + struct mlx5_ib_qp_base *base; unsigned long flags; int err; @@ -1906,12 +1927,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) return; } - base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ? + base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) ? &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; if (qp->state != IB_QPS_RESET) { - if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) { err = mlx5_core_qp_modify(dev->mdev, MLX5_CMD_OP_2RST_QP, 0, NULL, &base->mqp); @@ -1950,7 +1973,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) mlx5_ib_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { destroy_raw_packet_qp(dev, qp); } else { err = mlx5_core_destroy_qp(dev->mdev, &base->mqp); @@ -2706,7 +2730,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; - } else if (ibqp->qp_type == IB_QPT_UD || + } else if ((ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_UNDERLAY)) || ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { @@ -2803,6 +2828,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; + + /* Underlay port should be used - index 0 function per port */ + if (qp->flags & MLX5_IB_QP_UNDERLAY) + port_num = 0; + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->cnts.set_id) << 24); @@ -2828,7 +2858,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { struct mlx5_modify_raw_qp_param raw_qp_param = {}; raw_qp_param.operation = op; @@ -2917,7 +2948,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port); } - if (qp_type != MLX5_IB_QPT_REG_UMR && + if (qp->flags & MLX5_IB_QP_UNDERLAY) { + if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { + mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", + attr_mask); + goto out; + } + } else if (qp_type != MLX5_IB_QPT_REG_UMR && !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", cur_state, new_state, ibqp->qp_type, attr_mask); @@ -4481,9 +4518,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); + /* Not all of output fields are applicable, make sure to zero them */ + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + memset(qp_attr, 0, sizeof(*qp_attr)); + mutex_lock(&qp->mutex); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || + qp->flags & MLX5_IB_QP_UNDERLAY) { err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state); if (err) goto out; -- cgit v1.2.3 From 81e308804ba6b389ead57b54a259154a36a560a8 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:09 +0300 Subject: IB/mlx5: Add multicast flow steering support for underlay QP In order to add multicast flow steering support, there is need to block the attaching of mcg flow for underlay QP, recognize multicast IB_FLOW_SPEC_IPV4 based on its IP and enable creating/destroying flow for IB layer. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 43 ++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7455f95bf679..bf74b2ee2f9c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2102,21 +2102,32 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, */ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) { - struct ib_flow_spec_eth *eth_spec; + union ib_flow_spec *flow_spec; if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->size < sizeof(struct ib_flow_attr) + - sizeof(struct ib_flow_spec_eth) || ib_attr->num_of_specs < 1) return false; - eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); - if (eth_spec->type != IB_FLOW_SPEC_ETH || - eth_spec->size != sizeof(*eth_spec)) + flow_spec = (union ib_flow_spec *)(ib_attr + 1); + if (flow_spec->type == IB_FLOW_SPEC_IPV4) { + struct ib_flow_spec_ipv4 *ipv4_spec; + + ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; + if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) + return true; + return false; + } + + if (flow_spec->type == IB_FLOW_SPEC_ETH) { + struct ib_flow_spec_eth *eth_spec; + + eth_spec = (struct ib_flow_spec_eth *)flow_spec; + return is_multicast_ether_addr(eth_spec->mask.dst_mac) && + is_multicast_ether_addr(eth_spec->val.dst_mac); + } - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); + return false; } static bool is_valid_ethertype(struct mlx5_core_dev *mdev, @@ -2594,8 +2605,14 @@ unlock: static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *mqp = to_mqp(ibqp); int err; + if (mqp->flags & MLX5_IB_QP_UNDERLAY) { + mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); + return -EOPNOTSUPP; + } + err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); if (err) mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", @@ -3941,18 +3958,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); } + dev->ib_dev.create_flow = mlx5_ib_create_flow; + dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | -- cgit v1.2.3 From 1d54f89094e2d2067948550ec52aab93e15b4b0c Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 8 Jun 2017 16:15:11 +0300 Subject: IB/mlx5: Report RX checksum capabilities for IPoIB Report RX checksum capabilities when 'ipoib_enhanced_offloads' capabilities are set and support checksum. Signed-off-by: Yishai Hadas Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bf74b2ee2f9c..2757d445b042 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -702,6 +702,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_GEN(dev->mdev, general_notification_event)) props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && + MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap)) + props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { /* Legacy bit to support old userspace libraries */ -- cgit v1.2.3 From 4c25b7a39005c9243a492b577c3e940eeac36a25 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 12 Jun 2017 10:36:15 +0300 Subject: IB/mlx5: Fix cached MR allocation flow When we have a miss in one order of the mkey cache, we try to get an mkey from a higher order. We still need to check that the higher order can be used with UMR before using it. Otherwise, we will get an mkey with 0 entries and the post send operation that is used to fill it will complete with the following error: mlx5_0:dump_cqe:275:(pid 0): dump error cqe 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0f007806 25000025 49ce59d2 Fixes: 49780d42dfc9 ("IB/mlx5: Expose MR cache for mlx5_ib") Cc: # v4.10+ Signed-off-by: Majd Dibbiny Reviewed-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2c40a2e989d2..a0eb2f96179a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,6 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); +static int max_umr_order(struct mlx5_ib_dev *dev); static int use_umr(struct mlx5_ib_dev *dev, int order); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); @@ -491,16 +492,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent; + int last_umr_cache_entry; int c; int i; c = order2idx(dev, order); - if (c < 0 || c > MAX_UMR_CACHE_ENTRY) { + last_umr_cache_entry = order2idx(dev, max_umr_order(dev)); + if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } - for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) { + for (i = c; i <= last_umr_cache_entry; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); @@ -816,11 +819,16 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int use_umr(struct mlx5_ib_dev *dev, int order) +static int max_umr_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return order <= MAX_UMR_CACHE_ENTRY + 2; - return order <= MLX5_MAX_UMR_SHIFT; + return MAX_UMR_CACHE_ENTRY + 2; + return MLX5_MAX_UMR_SHIFT; +} + +static int use_umr(struct mlx5_ib_dev *dev, int order) +{ + return order <= max_umr_order(dev); } static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, -- cgit v1.2.3 From 3fffc82ad6c78fcc9d5d4eca089f00db14ab0358 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 10:36:16 +0300 Subject: IB/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- include/linux/mlx5/qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index fff4ec13f620..66d19b611fe4 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit v1.2.3 From 58dcb60a226ee48cc66c96c27b751f06ec2bc5a9 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 19 Jun 2017 07:19:37 +0300 Subject: IB/mlx5: Expose extended error counters This patch adds below requester and responder side error counters, which will be exposed by hardware counters interface and are supported as part of query Q counters command extension. +---------------------------+-------------------------------------+ | Name | Description | |---------------------------+-------------------------------------| |resp_local_length_error | Number of times responder detected | | | local length errors | |---------------------------+-------------------------------------| |resp_cqe_error | Number of CQEs completed with error | | | at responder | |---------------------------+-------------------------------------| |req_cqe_error | Number of CQEs completed with error | | | at requester | |---------------------------+-------------------------------------| |req_remote_invalid_request | Number of times requester detected | | | remote invalid request error | |---------------------------+-------------------------------------| |req_remote_access_error | Number of times requester detected | | | remote access error | |---------------------------+-------------------------------------| |resp_remote_access_error | Number of times responder detected | | | remote access error | |---------------------------+-------------------------------------| |resp_cqe_flush_error | Number of CQEs completed with | | | flushed with error at responder | |---------------------------+-------------------------------------| |req_cqe_flush_error | Number of CQEs completed with | | | flushed with error at requester | +---------------------------+-------------------------------------+ Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 ++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 44 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2757d445b042..9dd9759459fb 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3432,6 +3432,17 @@ static const struct mlx5_ib_counter cong_cnts[] = { INIT_CONG_COUNTER(np_cnp_sent), }; +static const struct mlx5_ib_counter extended_err_cnts[] = { + INIT_Q_COUNTER(resp_local_length_error), + INIT_Q_COUNTER(resp_cqe_error), + INIT_Q_COUNTER(req_cqe_error), + INIT_Q_COUNTER(req_remote_invalid_request), + INIT_Q_COUNTER(req_remote_access_errors), + INIT_Q_COUNTER(resp_remote_access_errors), + INIT_Q_COUNTER(resp_cqe_flush_error), + INIT_Q_COUNTER(req_cqe_flush_error), +}; + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { unsigned int i; @@ -3456,6 +3467,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) num_counters += ARRAY_SIZE(retrans_q_cnts); + + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) + num_counters += ARRAY_SIZE(extended_err_cnts); + cnts->num_q_counters = num_counters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { @@ -3505,6 +3520,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, } } + if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { + for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + names[j] = extended_err_cnts[i].name; + offsets[j] = extended_err_cnts[i].offset; + } + } + if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { names[j] = cong_cnts[i].name; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f350688a12fa..5bae70eb25af 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -858,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pcam_reg[0x1]; u8 local_ca_ack_delay[0x5]; u8 port_module_event[0x1]; - u8 reserved_at_1b1[0x1]; + u8 enhanced_error_q_counters[0x1]; u8 ports_check[0x1]; u8 reserved_at_1b3[0x1]; u8 disable_link_up[0x1]; @@ -3953,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0x4e0]; + u8 reserved_at_320[0xa0]; + + u8 resp_local_length_error[0x20]; + + u8 req_local_length_error[0x20]; + + u8 resp_local_qp_error[0x20]; + + u8 local_operation_error[0x20]; + + u8 resp_local_protection[0x20]; + + u8 req_local_protection[0x20]; + + u8 resp_cqe_error[0x20]; + + u8 req_cqe_error[0x20]; + + u8 req_mw_binding[0x20]; + + u8 req_bad_response[0x20]; + + u8 req_remote_invalid_request[0x20]; + + u8 resp_remote_invalid_request[0x20]; + + u8 req_remote_access_errors[0x20]; + + u8 resp_remote_access_errors[0x20]; + + u8 req_remote_operation_errors[0x20]; + + u8 req_transport_retries_exceeded[0x20]; + + u8 cq_overflow[0x20]; + + u8 resp_cqe_flush_error[0x20]; + + u8 req_cqe_flush_error[0x20]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_query_q_counter_in_bits { -- cgit v1.2.3 From ea30b966f7dd6bcfb20c98a7f99608c7bb10bfac Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 21 Jun 2017 09:26:28 +0300 Subject: IB/mlx4: Add inline-receive support When inline-receive is enabled, the HCA may write received data into the receive WQE. Inline-receive is enabled by setting its matching bit in the QP context and each single-packet message with payload not exceeding the receive WQE size will be delivered to the WQE. The completion report will indicate that the payload was placed to the WQE. It includes: 1) Return maximum supported size of inline-receive by the hardware in query_device vendor's data part. 2) Enable the feature when requested by the vendor data input. Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 +++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 +++ drivers/infiniband/hw/mlx4/qp.c | 32 +++++++++++++++++++++++++------- include/uapi/rdma/mlx4-abi.h | 3 ++- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index d1b43cbbfea7..e8c290edb1e1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -563,6 +563,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, } } + if (uhw->outlen >= resp.response_length + + sizeof(resp.max_inl_recv_sz)) { + resp.response_length += sizeof(resp.max_inl_recv_sz); + resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9db82e67e959..a6337f3161cf 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -318,6 +318,7 @@ struct mlx4_ib_qp { u8 sq_no_prefetch; u8 state; int mlx_type; + u32 inl_recv_sz; struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; @@ -623,6 +624,8 @@ struct mlx4_uverbs_ex_query_device_resp { __u32 comp_mask; __u32 response_length; __u64 hca_core_clock_offset; + __u32 max_inl_recv_sz; + __u32 reserved; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 75c0e6c5dd56..d1caa39a3943 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -377,7 +377,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp, + u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || @@ -385,18 +386,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; if (!has_rq) { - if (cap->max_recv_wr) + if (cap->max_recv_wr || inl_recv_sz) return -EINVAL; qp->rq.wqe_cnt = qp->rq.max_gs = 0; } else { + u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg * + sizeof(struct mlx4_wqe_data_seg); + u32 wqe_size; + /* HW requires >= 1 RQ entry with >= 1 gather entry */ - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge || + inl_recv_sz > max_inl_recv_sz)) return -EINVAL; qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr)); qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); - qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg); + qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz)); } /* leave userspace return values as they were, so as not to break ABI */ @@ -719,9 +726,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); - if (err) - goto err; if (pd->uobject) { struct mlx4_ib_create_qp ucmd; @@ -731,6 +735,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; } + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); + if (err) + goto err; + + qp->inl_recv_sz = ucmd.inl_recv_sz; qp->sq_no_prefetch = ucmd.sq_no_prefetch; err = set_user_sq_size(dev, qp, &ucmd); @@ -760,6 +770,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; } } else { + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, 0); + if (err) + goto err; + qp->sq_no_prefetch = 0; if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) @@ -1651,6 +1666,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (qp->inl_recv_sz) + context->param3 |= cpu_to_be32(1 << 25); + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; else if (ibqp->qp_type == IB_QPT_RAW_PACKET) diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index af431752655c..bf3bdba2f326 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -101,7 +101,8 @@ struct mlx4_ib_create_qp { __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; - __u8 reserved[5]; + __u32 inl_recv_sz; + __u8 reserved; }; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From f3301870161ca293cd14b20a802c5646da02407f Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 21 Jun 2017 09:29:36 +0300 Subject: (IB, net)/mlx4: Add resource utilization support Adding visibility of resource usage of QPs, CQs and counters used by virtual functions. This feature will be used to give the PF administrator more data while debugging VF status. Usage info was added to ALLOC_RES command, to notify the PF if the resource which is being reserved or allocated for the VF will be used by kernel driver or by user verbs. Updated reservation and allocation functions of QP, CQ and counter with additional usage parameter. Signed-off-by: Moshe Shemesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 2 ++ drivers/infiniband/hw/mlx4/main.c | 6 ++++-- drivers/infiniband/hw/mlx4/qp.c | 13 +++++++++---- drivers/net/ethernet/mellanox/mlx4/cq.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 3 ++- drivers/net/ethernet/mellanox/mlx4/main.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx4/qp.c | 5 +++-- include/linux/mlx4/device.h | 12 ++++++++++-- 11 files changed, 45 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ff931c580557..95382faa7ad1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_mtt; uar = &to_mucontext(context)->uar; + cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) @@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_db; uar = &dev->priv_uar; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } if (dev->eq_table) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e8c290edb1e1..0944e224c0df 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2779,7 +2779,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) allocated = 0; if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { - err = mlx4_counter_alloc(ibdev->dev, &counter_index); + err = mlx4_counter_alloc(ibdev->dev, &counter_index, + MLX4_RES_USAGE_DRIVER); /* if failed to allocate a new counter, use default */ if (err) counter_index = @@ -2834,7 +2835,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, - &ibdev->steer_qpn_base, 0); + &ibdev->steer_qpn_base, 0, + MLX4_RES_USAGE_DRIVER); if (err) goto err_counter; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d1caa39a3943..247b9132e9de 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -769,6 +769,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; } + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp, 0); @@ -841,6 +842,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_wrid; } + qp->mqp.usage = MLX4_RES_USAGE_DRIVER; } if (sqpn) { @@ -860,13 +862,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, (init_attr->cap.max_send_wr ? MLX4_RESERVE_ETH_BF_QP : 0) | (init_attr->cap.max_recv_wr ? - MLX4_RESERVE_A0_QP : 0)); + MLX4_RESERVE_A0_QP : 0), + qp->mqp.usage); else if (qp->flags & MLX4_IB_QP_NETIF) err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); else err = mlx4_qp_reserve_range(dev->dev, 1, 1, - &qpn, 0); + &qpn, 0, qp->mqp.usage); if (err) goto err_proxy; } @@ -1218,7 +1221,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { - int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0); + int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, + 1, 1, &sqpn, 0, + MLX4_RES_USAGE_DRIVER); if (res) return ERR_PTR(res); @@ -1581,7 +1586,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK)) return 0; - err = mlx4_counter_alloc(dev->dev, &tmp_idx); + err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index c56a511b918e..72eb50cd5ecd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -241,13 +241,14 @@ err_out: return err; } -static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) +static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage) { + u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -303,7 +304,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->vector = vector; - err = mlx4_cq_alloc_icm(dev, &cq->cqn); + err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 85fe17e4dcfb..f849eec21824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -140,6 +140,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, (cq->type == RX && priv->hwtstamp_config.rx_filter)) timestamp_en = 1; + cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, cq->vector, 0, timestamp_en); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 3a291fc1780a..e3e6d9fa69fd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -651,7 +651,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return 0; } - err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 436f7689a032..ad1ffd5857cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1081,7 +1081,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, - MLX4_RESERVE_A0_QP); + MLX4_RESERVE_A0_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; @@ -1127,7 +1128,8 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0; err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn, flags); + &rss_map->base_qpn, flags, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 73faa3d77921..a81db2582555 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -105,7 +105,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, (unsigned long long) ring->sp_wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, - MLX4_RESERVE_ETH_BF_QP); + MLX4_RESERVE_ETH_BF_QP, + MLX4_RES_USAGE_DRIVER); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_hwq_res; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a27c9c13a36e..fb2591d0e735 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2475,7 +2475,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev) priv->def_counter[port] = -1; for (port = 0; port < dev->caps.num_ports; port++) { - err = mlx4_counter_alloc(dev, &idx); + err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER); if (!err || err == -ENOSPC) { priv->def_counter[port] = idx; @@ -2517,13 +2517,14 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) return 0; } -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx) +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage) { + u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30); u64 out_param; int err; if (mlx4_is_mfunc(dev)) { - err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER, + err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 26747212526b..5e5b4475b85e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -245,8 +245,9 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, } int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags) + int *base, u8 flags, u8 usage) { + u32 in_modifier = RES_QP | (((u32)usage & 3) << 30); u64 in_param = 0; u64 out_param; int err; @@ -258,7 +259,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, - RES_QP, RES_OP_RESERVE, + in_modifier, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..3607da001ad3 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -428,6 +428,12 @@ enum mlx4_steer_type { MLX4_NUM_STEERS }; +enum mlx4_resource_usage { + MLX4_RES_USAGE_NONE, + MLX4_RES_USAGE_DRIVER, + MLX4_RES_USAGE_USER_VERBS, +}; + enum { MLX4_NUM_FEXCH = 64 * 1024, }; @@ -748,6 +754,7 @@ struct mlx4_cq { } tasklet_ctx; int reset_notify_added; struct list_head reset_notify; + u8 usage; }; struct mlx4_qp { @@ -757,6 +764,7 @@ struct mlx4_qp { atomic_t refcount; struct completion free; + u8 usage; }; struct mlx4_srq { @@ -1120,7 +1128,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base, u8 flags); + int *base, u8 flags, u8 usage); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); @@ -1417,7 +1425,7 @@ int mlx4_get_phys_port_id(struct mlx4_dev *dev); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); -int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx); +int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage); void mlx4_counter_free(struct mlx4_dev *dev, u32 idx); int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port); -- cgit v1.2.3 From 400b1ebcfe31279895f1baa8ecaa390d9a4a0eef Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:24 +0300 Subject: IB/mlx4: Add support for WQ related verbs Support create/modify/destroy WQ related verbs. The base IB object to enable RSS functionality is a WQ (i.e. ib_wq). This patch implements the related WQ verbs as of create, modify and destroy. In downstream patches the WQ will be used as part of an indirection table (i.e. ib_rwq_ind_table) to enable RSS QP creation. Notes: ConnectX-3 hardware requires consecutive WQNs list as receive descriptor queues for the RSS QP. Hence, the driver manages consecutive ranges lists per context which the user must respect. Destroying the WQ does not return its WQN back to its range for reusing. However, destroying all WQs from the same range releases the range and in turn releases its WQNs for reusing. Since the WQ object is not a natural object in the hardware, the driver implements the WQ by the hardware QP. As such, the WQ inherits its port from its RSS QP parent upon its RST->INIT transition and by that time its state is applied to the hardware. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 24 ++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 25 +- drivers/infiniband/hw/mlx4/qp.c | 505 +++++++++++++++++++++++++++++++---- include/uapi/rdma/mlx4-abi.h | 14 + 4 files changed, 511 insertions(+), 57 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0944e224c0df..7c6f929ebd3e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -81,6 +81,8 @@ static const char mlx4_ib_version[] = DRV_VERSION "\n"; static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); +static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device, + u8 port_num); static struct workqueue_struct *wq; @@ -552,6 +554,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0xFFFFFFFFFFFFULL; props->max_ah = INT_MAX; + if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) + props->max_wq_type_rq = props->max_qp; + if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); @@ -1076,6 +1083,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); + INIT_LIST_HEAD(&context->wqn_ranges_list); + mutex_init(&context->wqn_ranges_mutex); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); else @@ -2720,6 +2730,20 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; + if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && + ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == + IB_LINK_LAYER_ETHERNET) || + (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == + IB_LINK_LAYER_ETHERNET))) { + ibdev->ib_dev.create_wq = mlx4_ib_create_wq; + ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; + ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + } + if (!mlx4_is_slave(ibdev->dev)) { ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index a6337f3161cf..ba4e78c064d2 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -88,6 +88,8 @@ struct mlx4_ib_ucontext { struct list_head db_page_list; struct mutex db_page_mutex; struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT]; + struct list_head wqn_ranges_list; + struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ }; struct mlx4_ib_pd { @@ -289,8 +291,19 @@ struct mlx4_roce_smac_vlan_info { int update_vid; }; +struct mlx4_wqn_range { + int base_wqn; + int size; + int refcount; + bool dirty; + struct list_head list; +}; + struct mlx4_ib_qp { - struct ib_qp ibqp; + union { + struct ib_qp ibqp; + struct ib_wq ibwq; + }; struct mlx4_qp mqp; struct mlx4_buf buf; @@ -329,6 +342,9 @@ struct mlx4_ib_qp { struct list_head cq_recv_list; struct list_head cq_send_list; struct counter_index *counter_index; + struct mlx4_wqn_range *wqn_range; + /* Number of RSS QP parents that uses this WQ */ + u32 rss_usecnt; }; struct mlx4_ib_srq { @@ -893,4 +909,11 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq); +int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 247b9132e9de..65e4ec549368 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -116,6 +116,11 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), }; +enum mlx4_ib_source_type { + MLX4_IB_QP_SRC = 0, + MLX4_IB_RWQ_SRC = 1, +}; + static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); @@ -330,6 +335,12 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } +static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type) +{ + pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n", + type, qp->qpn); +} + static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* @@ -639,7 +650,91 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +/* + * This function allocates a WQN from a range which is consecutive and aligned + * to its size. In case the range is full, then it creates a new range and + * allocates WQN from it. The new range will be used for following allocations. + */ +static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, int range_size, int *wqn) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + int err = 0; + + mutex_lock(&context->wqn_ranges_mutex); + + range = list_first_entry_or_null(&context->wqn_ranges_list, + struct mlx4_wqn_range, list); + + if (!range || (range->refcount == range->size) || range->dirty) { + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + err = -ENOMEM; + goto out; + } + + err = mlx4_qp_reserve_range(dev->dev, range_size, + range_size, &range->base_wqn, 0, + qp->mqp.usage); + if (err) { + kfree(range); + goto out; + } + + range->size = range_size; + list_add(&range->list, &context->wqn_ranges_list); + } else if (range_size != 1) { + /* + * Requesting a new range (>1) when last range is still open, is + * not valid. + */ + err = -EINVAL; + goto out; + } + + qp->wqn_range = range; + + *wqn = range->base_wqn + range->refcount; + + range->refcount++; + +out: + mutex_unlock(&context->wqn_ranges_mutex); + + return err; +} + +static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context, + struct mlx4_ib_qp *qp, bool dirty_release) +{ + struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device); + struct mlx4_wqn_range *range; + + mutex_lock(&context->wqn_ranges_mutex); + + range = qp->wqn_range; + + range->refcount--; + if (!range->refcount) { + mlx4_qp_release_range(dev->dev, range->base_wqn, + range->size); + list_del(&range->list); + kfree(range); + } else if (dirty_release) { + /* + * A range which one of its WQNs is destroyed, won't be able to be + * reused for further WQN allocations. + * The next created WQ will allocate a new range. + */ + range->dirty = 1; + } + + mutex_unlock(&context->wqn_ranges_mutex); +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, + enum mlx4_ib_source_type src, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) @@ -652,6 +747,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; unsigned long flags; + int range_size = 0; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { @@ -728,27 +824,69 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (pd->uobject) { - struct mlx4_ib_create_qp ucmd; + union { + struct mlx4_ib_create_qp qp; + struct mlx4_ib_create_wq wq; + } ucmd; + size_t copy_len; + + copy_len = (src == MLX4_IB_QP_SRC) ? + sizeof(struct mlx4_ib_create_qp) : + min(sizeof(struct mlx4_ib_create_wq), udata->inlen); - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { + if (ib_copy_from_udata(&ucmd, udata, copy_len)) { err = -EFAULT; goto err; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, - qp_has_rq(init_attr), qp, ucmd.inl_recv_sz); - if (err) - goto err; + if (src == MLX4_IB_RWQ_SRC) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || + ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || + ucmd.wq.reserved[2]) { + pr_debug("user command isn't supported\n"); + err = -EOPNOTSUPP; + goto err; + } - qp->inl_recv_sz = ucmd.inl_recv_sz; - qp->sq_no_prefetch = ucmd.sq_no_prefetch; + if (ucmd.wq.log_range_size > + ilog2(dev->dev->caps.max_rss_tbl_sz)) { + pr_debug("WQN range size must be equal or smaller than %d\n", + dev->dev->caps.max_rss_tbl_sz); + err = -EOPNOTSUPP; + goto err; + } + range_size = 1 << ucmd.wq.log_range_size; + } else { + qp->inl_recv_sz = ucmd.qp.inl_recv_sz; + } - err = set_user_sq_size(dev, qp, &ucmd); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); + if (src == MLX4_IB_QP_SRC) { + qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch; + + err = set_user_sq_size(dev, qp, + (struct mlx4_ib_create_qp *) + &ucmd); + if (err) + goto err; + } else { + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + /* Allocated buffer expects to have at least that SQ + * size. + */ + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + } + + qp->umem = ib_umem_get(pd->uobject->context, + (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : + ucmd.wq.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -765,7 +903,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), - ucmd.db_addr, &qp->db); + (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr : + ucmd.wq.db_addr, &qp->db); if (err) goto err_mtt; } @@ -853,6 +992,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_wrid; } } + } else if (src == MLX4_IB_RWQ_SRC) { + err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp, + range_size, &qpn); + if (err) + goto err_wrid; } else { /* Raw packet QPNs may not have bits 6,7 set in their qp_num; * otherwise, the WQE BlueFlame setup flow wrongly causes @@ -891,7 +1035,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); - qp->mqp.event = mlx4_ib_qp_event; + qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event : + mlx4_ib_wq_event; + if (!*caller_qp) *caller_qp = qp; @@ -918,6 +1064,9 @@ err_qpn: if (!sqpn) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext(pd->uobject->context), + qp, 0); else mlx4_qp_release_range(dev->dev, qpn, 1); } @@ -1016,7 +1165,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) return to_mpd(qp->ibqp.pd); } -static void get_cqs(struct mlx4_ib_qp *qp, +static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) { switch (qp->ibqp.qp_type) { @@ -1029,14 +1178,16 @@ static void get_cqs(struct mlx4_ib_qp *qp, *recv_cq = *send_cq; break; default: - *send_cq = to_mcq(qp->ibqp.send_cq); - *recv_cq = to_mcq(qp->ibqp.recv_cq); + *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) : + to_mcq(qp->ibwq.cq); + *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) : + *recv_cq; break; } } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - int is_user) + enum mlx4_ib_source_type src, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1069,7 +1220,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } } - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src, &send_cq, &recv_cq); spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -1095,6 +1246,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); + else if (src == MLX4_IB_RWQ_SRC) + mlx4_ib_release_wqn(to_mucontext( + qp->ibwq.uobject->context), qp, 1); else mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); } @@ -1102,9 +1256,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (qp->rq.wqe_cnt) - mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), - &qp->db); + if (qp->rq.wqe_cnt) { + struct mlx4_ib_ucontext *mcontext = !src ? + to_mucontext(qp->ibqp.uobject->context) : + to_mucontext(qp->ibwq.uobject->context); + mlx4_ib_db_unmap_user(mcontext, &qp->db); + } ib_umem_release(qp->umem); } else { kvfree(qp->sq.wrid); @@ -1200,8 +1357,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_UD: { - err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1231,8 +1388,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, &qp); + err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, + init_attr, udata, sqpn, &qp); if (err) return ERR_PTR(err); @@ -1303,7 +1460,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) mlx4_ib_free_qp_counter(dev, mqp); pd = get_pd(mqp); - destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1626,12 +1783,15 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } -static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, +static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { - struct mlx4_ib_dev *dev = to_mdev(ibqp->device); - struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct ib_uobject *ibuobject; + struct ib_srq *ibsrq; + enum ib_qp_type qp_type; + struct mlx4_ib_dev *dev; + struct mlx4_ib_qp *qp; struct mlx4_ib_pd *pd; struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; @@ -1641,6 +1801,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int err = -EINVAL; int counter_index; + if (src_type == MLX4_IB_RWQ_SRC) { + struct ib_wq *ibwq; + + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); + } else { + struct ib_qp *ibqp; + + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); + } + /* APM is not supported under RoCE */ if (attr_mask & IB_QP_ALT_PATH && rdma_port_get_link_layer(&dev->ib_dev, qp->port) == @@ -1674,11 +1856,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (qp->inl_recv_sz) context->param3 |= cpu_to_be32(1 << 25); - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; - else if (ibqp->qp_type == IB_QPT_RAW_PACKET) + else if (qp_type == IB_QPT_RAW_PACKET) context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; - else if (ibqp->qp_type == IB_QPT_UD) { + else if (qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); @@ -1708,14 +1890,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; context->xrcd = cpu_to_be32((u32) qp->xrcdn); - if (ibqp->qp_type == IB_QPT_RAW_PACKET) + if (qp_type == IB_QPT_RAW_PACKET) context->param3 |= cpu_to_be32(1 << 30); } - if (qp->ibqp.uobject) + if (ibuobject) context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, - to_mucontext(ibqp->uobject->context)->uar.index)); + to_mucontext(ibuobject->context) + ->uar.index)); else context->usr_page = cpu_to_be32( mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index)); @@ -1759,7 +1942,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, steer_qp = 1; } - if (ibqp->qp_type == IB_QPT_GSI) { + if (qp_type == IB_QPT_GSI) { enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ? IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE; u8 qpc_roce_mode = gid_type_to_qpc(gid_type); @@ -1776,7 +1959,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : + u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; @@ -1791,7 +1974,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, int index = rdma_ah_read_grh(&attr->ah_attr)->sgid_index; - status = ib_get_cached_gid(ibqp->device, port_num, + status = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &gid_attr); if (!status && !memcmp(&gid, &zgid, sizeof(gid))) status = -ENOENT; @@ -1848,15 +2031,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - pd = get_pd(qp); - get_cqs(qp, &send_cq, &recv_cq); + get_cqs(qp, src_type, &send_cq, &recv_cq); context->pd = cpu_to_be32(pd->pdn); context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); /* Set "fast registration enabled" for all kernel QPs */ - if (!qp->ibqp.uobject) + if (!ibuobject) context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { @@ -1891,7 +2073,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; } - if (ibqp->srq) + if (ibsrq) context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { @@ -1922,17 +2104,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_Q_KEY; } - if (ibqp->srq) - context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); + if (ibsrq) + context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibsrq)->msrq.srqn); - if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && - (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD || - ibqp->qp_type == IB_QPT_RAW_PACKET)) { + (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI || + qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || qp->mlx4_ib_qp_type & @@ -1965,7 +2149,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + if (qp_type == IB_QPT_RAW_PACKET) { context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | MLX4_IB_LINK_TYPE_ETH; if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { @@ -1975,7 +2159,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { + if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { int is_eth = rdma_port_get_link_layer( &dev->ib_dev, qp->port) == IB_LINK_LAYER_ETHERNET; @@ -1985,14 +2169,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) context->rlkey_roce_mode |= (1 << 4); /* @@ -2001,7 +2186,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * headroom is stamped so that the hardware doesn't start * processing stale work requests. */ - if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + if (!ibuobject && + cur_state == IB_QPS_RESET && + new_state == IB_QPS_INIT) { struct mlx4_wqe_ctrl_seg *ctrl; int i; @@ -2058,9 +2245,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * entries and reinitialize the QP. */ if (new_state == IB_QPS_RESET) { - if (!ibqp->uobject) { + if (!ibuobject) { mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq) : NULL); + ibsrq ? to_msrq(ibsrq) : NULL); if (send_cq != recv_cq) mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); @@ -2265,7 +2452,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, + cur_state, new_state); if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3550,3 +3738,208 @@ out: return err; } +struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev; + struct ib_qp_init_attr ib_qp_init_attr; + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_wq ucmd; + int err, required_cmd_sz; + + if (!(udata && pd->uobject)) + return ERR_PTR(-EINVAL); + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + dev = to_mdev(pd->device); + + if (init_attr->wq_type != IB_WQT_RQ) { + pr_debug("unsupported wq type %d\n", init_attr->wq_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("unsupported create_flags %u\n", + init_attr->create_flags); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr)); + ib_qp_init_attr.qp_context = init_attr->wq_context; + ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET; + ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr; + ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge; + ib_qp_init_attr.recv_cq = init_attr->cq; + ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */ + + err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr, + udata, 0, &qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibwq.event_handler = init_attr->event_handler; + qp->ibwq.wq_num = qp->mqp.qpn; + qp->ibwq.state = IB_WQS_RESET; + + return &qp->ibwq; +} + +static int ib_wq2qp_state(enum ib_wq_state state) +{ + switch (state) { + case IB_WQS_RESET: + return IB_QPS_RESET; + case IB_WQS_RDY: + return IB_QPS_RTR; + default: + return IB_QPS_ERR; + } +} + +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + enum ib_qp_state qp_cur_state; + enum ib_qp_state qp_new_state; + int attr_mask; + int err; + + /* ib_qp.state represents the WQ HW state while ib_wq.state represents + * the WQ logic state. + */ + qp_cur_state = qp->state; + qp_new_state = ib_wq2qp_state(new_state); + + if (ib_wq2qp_state(new_state) == qp_cur_state) + return 0; + + if (new_state == IB_WQS_RDY) { + struct ib_qp_attr attr = {}; + + attr.port_num = qp->port; + attr_mask = IB_QP_PORT; + + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr, + attr_mask, IB_QPS_RESET, IB_QPS_INIT); + if (err) { + pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n", + ibwq->wq_num); + return err; + } + + qp_cur_state = IB_QPS_INIT; + } + + attr_mask = 0; + err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask, + qp_cur_state, qp_new_state); + + if (err && (qp_cur_state == IB_QPS_INIT)) { + qp_new_state = IB_QPS_RESET; + if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, + attr_mask, IB_QPS_INIT, IB_QPS_RESET)) { + pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n", + ibwq->wq_num); + qp_new_state = IB_QPS_INIT; + } + } + + qp->state = qp_new_state; + + return err; +} + +int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + struct mlx4_ib_modify_wq ucmd = {}; + size_t required_cmd_sz; + enum ib_wq_state cur_state, new_state; + int err = 0; + + required_cmd_sz = offsetof(typeof(ucmd), reserved) + + sizeof(ucmd.reserved); + if (udata->inlen < required_cmd_sz) + return -EINVAL; + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) + return -EOPNOTSUPP; + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) + return -EFAULT; + + if (ucmd.comp_mask || ucmd.reserved) + return -EOPNOTSUPP; + + if (wq_attr_mask & IB_WQ_FLAGS) + return -EOPNOTSUPP; + + cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state : + ibwq->state; + new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state; + + if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR || + new_state < IB_WQS_RESET || new_state > IB_WQS_ERR) + return -EINVAL; + + if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR)) + return -EINVAL; + + if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) + return -EINVAL; + + /* Can update HW state only if a RSS QP has already associated to this + * WQ, so we can apply its port on the WQ. + */ + if (qp->rss_usecnt) + err = _mlx4_ib_modify_wq(ibwq, new_state); + + if (!err) + ibwq->state = new_state; + + return err; +} + +int mlx4_ib_destroy_wq(struct ib_wq *ibwq) +{ + struct mlx4_ib_dev *dev = to_mdev(ibwq->device); + struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); + + if (qp->counter_index) + mlx4_ib_free_qp_counter(dev, qp); + + destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1); + + kfree(qp); + + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index bf3bdba2f326..c9702a5f0bda 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -105,4 +105,18 @@ struct mlx4_ib_create_qp { __u8 reserved; }; +struct mlx4_ib_create_wq { + __u64 buf_addr; + __u64 db_addr; + __u8 log_range_size; + __u8 reserved[3]; + __u32 comp_mask; + __u32 reserved1; +}; + +struct mlx4_ib_modify_wq { + __u32 comp_mask; + __u32 reserved; +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From b8d46ca035060e70f5f0da849d86720752d5aa17 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:25 +0300 Subject: IB/mlx4: Add support for WQ indirection table related verbs To enable RSS functionality the IB indirection table object (i.e. ib_rwq_ind_table) should be used. This patch implements the related verbs as of create and destroy an indirection table. In downstream patches the indirection table will be used as part of RSS QP creation. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 12 +++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 6 ++++ drivers/infiniband/hw/mlx4/qp.c | 70 ++++++++++++++++++++++++++++++++++++ include/uapi/rdma/mlx4-abi.h | 4 +++ 4 files changed, 89 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7c6f929ebd3e..b42234571a8c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2738,10 +2738,16 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.create_wq = mlx4_ib_create_wq; ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; + ibdev->ib_dev.create_rwq_ind_table = + mlx4_ib_create_rwq_ind_table; + ibdev->ib_dev.destroy_rwq_ind_table = + mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); } if (!mlx4_is_slave(ibdev->dev)) { diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ba4e78c064d2..85525bc400a0 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -916,4 +916,10 @@ int mlx4_ib_destroy_wq(struct ib_wq *wq); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 65e4ec549368..519919d15474 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3943,3 +3943,73 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq) return 0; } + +struct ib_rwq_ind_table +*mlx4_ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + struct ib_rwq_ind_table *rwq_ind_table; + struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; + unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + unsigned int base_wqn; + size_t min_resp_len; + int i; + int err; + + if (udata->inlen > 0 && + !ib_is_udata_cleared(udata, 0, + udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + if (udata->outlen && udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + if (ind_tbl_size > + device->attrs.rss_caps.max_rwq_indirection_table_size) { + pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", + ind_tbl_size, + device->attrs.rss_caps.max_rwq_indirection_table_size); + return ERR_PTR(-EINVAL); + } + + base_wqn = init_attr->ind_tbl[0]->wq_num; + + if (base_wqn % ind_tbl_size) { + pr_debug("WQN=0x%x isn't aligned with indirection table size\n", + base_wqn); + return ERR_PTR(-EINVAL); + } + + for (i = 1; i < ind_tbl_size; i++) { + if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { + pr_debug("indirection table's WQNs aren't consecutive\n"); + return ERR_PTR(-EINVAL); + } + } + + rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); + if (!rwq_ind_table) + return ERR_PTR(-ENOMEM); + + if (udata->outlen) { + resp.response_length = offsetof(typeof(resp), response_length) + + sizeof(resp.response_length); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + goto err; + } + + return rwq_ind_table; + +err: + kfree(rwq_ind_table); + return ERR_PTR(err); +} + +int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + kfree(ib_rwq_ind_tbl); + return 0; +} diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index c9702a5f0bda..5591d955ba00 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -119,4 +119,8 @@ struct mlx4_ib_modify_wq { __u32 reserved; }; +struct mlx4_ib_create_rwq_ind_tbl_resp { + __u32 response_length; + __u32 reserved; +}; #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From 3078f5f1bd8b6c8aef77b8ef4d49671fa6eb058e Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:26 +0300 Subject: IB/mlx4: Add support for RSS QP Add support to work with a RSS QP by using an indirection table object upon QP creation. Other related QP verbs (e.g. modify/destroy/query) were updated as well for that QP mode. Notes: - The RX hash properties are supplied as driver private data. - The RSS QP port is used on the associated WQs in its indirection table. Applying different ports during WQ life time is not allowed. - The expected RSS QP flow is: create, modify(RST->INIT), modify(RST->RTR), destroy. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 8 + drivers/infiniband/hw/mlx4/qp.c | 453 +++++++++++++++++++++++++++++++++-- include/uapi/rdma/mlx4-abi.h | 33 +++ 3 files changed, 472 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 85525bc400a0..1fa19820355a 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -46,6 +46,7 @@ #include #include +#include #define MLX4_IB_DRV_NAME "mlx4_ib" @@ -299,6 +300,12 @@ struct mlx4_wqn_range { struct list_head list; }; +struct mlx4_ib_rss { + unsigned int base_qpn_tbl_sz; + u8 flags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -345,6 +352,7 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; + struct mlx4_ib_rss *rss_ctx; }; struct mlx4_ib_srq { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 519919d15474..e42acfb20588 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -53,6 +53,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); +static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state); enum { MLX4_IB_ACK_REQ_FREQ = 8, @@ -650,6 +651,212 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, qp->counter_index = NULL; } +static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd) +{ + rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num | + (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24); + + if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) && + (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) { + memcpy(rss_ctx->rss_key, ucmd->rx_hash_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + pr_debug("RX Hash function is not supported\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + rss_ctx->flags = MLX4_RSS_IPV4; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) { + pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + rss_ctx->flags |= MLX4_RSS_IPV6; + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) { + pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) { + pr_debug("RX Hash fields_mask for UDP is not supported\n"); + return (-EOPNOTSUPP); + } + + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_UDP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) { + pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) && + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + if (rss_ctx->flags & MLX4_RSS_IPV4) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV4; + } else if (rss_ctx->flags & MLX4_RSS_IPV6) { + rss_ctx->flags |= MLX4_RSS_TCP_IPV6; + } else { + pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n"); + return (-EOPNOTSUPP); + } + + } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) || + (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) { + pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n"); + return (-EOPNOTSUPP); + } + + return 0; +} + +static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct mlx4_ib_create_qp_rss *ucmd, + struct mlx4_ib_qp *qp) +{ + int qpn; + int err; + + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; + + err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage); + if (err) + return err; + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + if (err) + goto err_qpn; + + mutex_init(&qp->mutex); + + INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); + + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE; + qp->state = IB_QPS_RESET; + + /* Set dummy send resources to be compatible with HV and PRM */ + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE; + qp->mtt = (to_mqp( + (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt; + + qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL); + if (!qp->rss_ctx) { + err = -ENOMEM; + goto err_qp_alloc; + } + + err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd); + if (err) + goto err; + + return 0; + +err: + kfree(qp->rss_ctx); + +err_qp_alloc: + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + +err_qpn: + mlx4_qp_release_range(dev->dev, qpn, 1); + return err; +} + +static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx4_ib_qp *qp; + struct mlx4_ib_create_qp_rss ucmd = {}; + size_t required_cmd_sz; + int err; + + if (!udata) { + pr_debug("RSS QP with NULL udata\n"); + return ERR_PTR(-EINVAL); + } + + if (udata->outlen) + return ERR_PTR(-EOPNOTSUPP); + + required_cmd_sz = offsetof(typeof(ucmd), reserved1) + + sizeof(ucmd.reserved1); + if (udata->inlen < required_cmd_sz) { + pr_debug("invalid inlen\n"); + return ERR_PTR(-EINVAL); + } + + if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { + pr_debug("copy failed\n"); + return ERR_PTR(-EFAULT); + } + + if (ucmd.comp_mask || ucmd.reserved1) + return ERR_PTR(-EOPNOTSUPP); + + if (udata->inlen > sizeof(ucmd) && + !ib_is_udata_cleared(udata, sizeof(ucmd), + udata->inlen - sizeof(ucmd))) { + pr_debug("inlen is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + pr_debug("RSS QP with unsupported QP type %d\n", + init_attr->qp_type); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->create_flags) { + pr_debug("RSS QP doesn't support create flags\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + if (init_attr->send_cq || init_attr->cap.max_send_wr) { + pr_debug("RSS QP with unsupported send attributes\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + + err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + qp->ibqp.qp_num = qp->mqp.qpn; + + return &qp->ibqp; +} + /* * This function allocates a WQN from a range which is consecutive and aligned * to its size. In case the range is full, then it creates a new range and @@ -1186,6 +1393,36 @@ static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, } } +static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + if (qp->state != IB_QPS_RESET) { + int i; + + for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size); + i++) { + struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + + if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), + MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) + pr_warn("modify QP %06x to RESET failed.\n", + qp->mqp.qpn); + } + + mlx4_qp_remove(dev->dev, &qp->mqp); + mlx4_qp_free(dev->dev, &qp->mqp); + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + del_gid_entries(qp); + kfree(qp->rss_ctx); +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src, int is_user) { @@ -1303,6 +1540,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; + if (init_attr->rwq_ind_tbl) + return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1444,7 +1684,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); - struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); @@ -1459,8 +1698,14 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (mqp->counter_index) mlx4_ib_free_qp_counter(dev, mqp); - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + if (qp->rwq_ind_tbl) { + destroy_qp_rss(dev, mqp); + } else { + struct mlx4_ib_pd *pd; + + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + } if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -1783,12 +2028,116 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) } } +/* + * Go over all RSS QP's childes (WQs) and apply their HW state according to + * their logic state if the RSS QP is the first RSS QP associated for the WQ. + */ +static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num) +{ + int i; + int err; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + /* Mlx4_ib restrictions: + * WQ's is associated to a port according to the RSS QP it is + * associates to. + * In case the WQ is associated to a different port by another + * RSS QP, return a failure. + */ + if ((wq->rss_usecnt > 0) && (wq->port != port_num)) { + err = -EINVAL; + mutex_unlock(&wq->mutex); + break; + } + wq->port = port_num; + if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) { + err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY); + if (err) { + mutex_unlock(&wq->mutex); + break; + } + } + wq->rss_usecnt++; + + mutex_unlock(&wq->mutex); + } + + if (i && err) { + int j; + + for (j = (i - 1); j >= 0; j--) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[j]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && + (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=0x%06x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } + } + + return err; +} + +static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl) +{ + int i; + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; + struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq); + + mutex_lock(&wq->mutex); + + if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY)) + if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET)) + pr_warn("failed to reverse WQN=%x\n", + ibwq->wq_num); + wq->rss_usecnt--; + + mutex_unlock(&wq->mutex); + } +} + +static void fill_qp_rss_context(struct mlx4_qp_context *context, + struct mlx4_ib_qp *qp) +{ + struct mlx4_rss_context *rss_context; + + rss_context = (void *)context + offsetof(struct mlx4_qp_context, + pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; + + rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz); + rss_context->default_qpn = + cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff); + if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6)) + rss_context->base_qpn_udp = rss_context->default_qpn; + rss_context->flags = qp->rss_ctx->flags; + /* Currently support just toeplitz */ + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + + memcpy(rss_context->rss_key, qp->rss_ctx->rss_key, + MLX4_EN_RSS_KEY_SIZE); +} + static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { struct ib_uobject *ibuobject; struct ib_srq *ibsrq; + struct ib_rwq_ind_table *rwq_ind_tbl; enum ib_qp_type qp_type; struct mlx4_ib_dev *dev; struct mlx4_ib_qp *qp; @@ -1804,23 +2153,25 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, if (src_type == MLX4_IB_RWQ_SRC) { struct ib_wq *ibwq; - ibwq = (struct ib_wq *)src; - ibuobject = ibwq->uobject; - ibsrq = NULL; - qp_type = IB_QPT_RAW_PACKET; - qp = to_mqp((struct ib_qp *)ibwq); - dev = to_mdev(ibwq->device); - pd = to_mpd(ibwq->pd); + ibwq = (struct ib_wq *)src; + ibuobject = ibwq->uobject; + ibsrq = NULL; + rwq_ind_tbl = NULL; + qp_type = IB_QPT_RAW_PACKET; + qp = to_mqp((struct ib_qp *)ibwq); + dev = to_mdev(ibwq->device); + pd = to_mpd(ibwq->pd); } else { struct ib_qp *ibqp; - ibqp = (struct ib_qp *)src; - ibuobject = ibqp->uobject; - ibsrq = ibqp->srq; - qp_type = ibqp->qp_type; - qp = to_mqp(ibqp); - dev = to_mdev(ibqp->device); - pd = get_pd(qp); + ibqp = (struct ib_qp *)src; + ibuobject = ibqp->uobject; + ibsrq = ibqp->srq; + rwq_ind_tbl = ibqp->rwq_ind_tbl; + qp_type = ibqp->qp_type; + qp = to_mqp(ibqp); + dev = to_mdev(ibqp->device); + pd = get_pd(qp); } /* APM is not supported under RoCE */ @@ -1836,6 +2187,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); + if (rwq_ind_tbl) { + fill_qp_rss_context(context, qp); + context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET); + } + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); else { @@ -1876,9 +2232,11 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, ilog2(dev->dev->caps.max_msg_sz); } - if (qp->rq.wqe_cnt) - context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; - context->rq_size_stride |= qp->rq.wqe_shift - 4; + if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */ + if (qp->rq.wqe_cnt) + context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3; + context->rq_size_stride |= qp->rq.wqe_shift - 4; + } if (qp->sq.wqe_cnt) context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; @@ -2031,8 +2389,14 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - get_cqs(qp, src_type, &send_cq, &recv_cq); - context->pd = cpu_to_be32(pd->pdn); + context->pd = cpu_to_be32(pd->pdn); + + if (!rwq_ind_tbl) { + get_cqs(qp, src_type, &send_cq, &recv_cq); + } else { /* Set dummy CQs to be compatible with HV and PRM */ + send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq); + recv_cq = send_cq; + } context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); @@ -2358,6 +2722,11 @@ out: return err; } +enum { + MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE | + IB_QP_PORT), +}; + static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -2388,6 +2757,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl) { + if (!(((cur_state == IB_QPS_RESET) && + (new_state == IB_QPS_INIT)) || + ((cur_state == IB_QPS_INIT) && + (new_state == IB_QPS_RTR)))) { + pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n", + ibqp->qp_num, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + + if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) { + pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n", + ibqp->qp_num, attr_mask, cur_state, new_state); + + err = -EOPNOTSUPP; + goto out; + } + } + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) { if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { if ((ibqp->qp_type == IB_QPT_RC) || @@ -2452,9 +2842,18 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } + if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) { + err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num); + if (err) + goto out; + } + err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask, cur_state, new_state); + if (ibqp->rwq_ind_tbl && err) + bring_down_rss_rwqs(ibqp->rwq_ind_tbl); + if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) attr->port_num = 1; @@ -3643,6 +4042,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr int mlx4_state; int err = 0; + if (ibqp->rwq_ind_tbl) + return -EOPNOTSUPP; + mutex_lock(&qp->mutex); if (qp->state == IB_QPS_RESET) { @@ -3917,6 +4319,11 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET)) return -EINVAL; + /* Need to protect against the parent RSS which also may modify WQ + * state. + */ + mutex_lock(&qp->mutex); + /* Can update HW state only if a RSS QP has already associated to this * WQ, so we can apply its port on the WQ. */ @@ -3926,6 +4333,8 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, if (!err) ibwq->state = new_state; + mutex_unlock(&qp->mutex); + return err; } diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 5591d955ba00..d915cab37ec3 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -95,6 +95,16 @@ struct mlx4_ib_create_srq_resp { __u32 reserved; }; +struct mlx4_ib_create_qp_rss { + __u64 rx_hash_fields_mask; + __u8 rx_hash_function; + __u8 rx_key_len; + __u8 reserved[6]; + __u8 rx_hash_key[40]; + __u32 comp_mask; + __u32 reserved1; +}; + struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; @@ -123,4 +133,27 @@ struct mlx4_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; + +/* RX Hash function flags */ +enum mlx4_ib_rx_hash_function_flags { + MLX4_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0, +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet's field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + */ +enum mlx4_ib_rx_hash_fields { + MLX4_IB_RX_HASH_SRC_IPV4 = 1 << 0, + MLX4_IB_RX_HASH_DST_IPV4 = 1 << 1, + MLX4_IB_RX_HASH_SRC_IPV6 = 1 << 2, + MLX4_IB_RX_HASH_DST_IPV6 = 1 << 3, + MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4, + MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5, + MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6, + MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + #endif /* MLX4_ABI_USER_H */ -- cgit v1.2.3 From 6afff1c7152f6f0787d7ad5de2e124f8643f0a45 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Tue, 4 Jul 2017 16:24:27 +0300 Subject: IB/mlx4: Expose RSS capabilities As a final step to support RSS feature, expose the RSS capabilities in query device verb. It includes: - Max rwq indirection tables. - Max rwq indirection table size. - Supported qp types. Signed-off-by: Guy Levi Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b42234571a8c..1dee0a51ef67 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -556,8 +556,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET || - mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) + mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) { + props->rss_caps.max_rwq_indirection_tables = props->max_qp; + props->rss_caps.max_rwq_indirection_table_size = + dev->dev->caps.max_rss_tbl_sz; + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; props->max_wq_type_rq = props->max_qp; + } if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); -- cgit v1.2.3 From ff8d836efe0629ad473b06a94a379682baf43cb9 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 17 Jun 2017 10:37:34 -0700 Subject: IB/hfi1: Add receiving queue info to qp_stats This patch adds qp->s_ack_queue indices and size to qp_stats printout. This information will provide information about the receiving side. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a7af9f60c13..ee2c74dce386 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -607,7 +607,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -630,6 +630,10 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_last, qp->s_acked, qp->s_cur, qp->s_tail, qp->s_head, qp->s_size, qp->s_avail, + /* ack_queue ring pointers, size */ + qp->s_tail_ack_queue, qp->r_head_ack_queue, + HFI1_MAX_RDMA_ATOMIC, + /* remote QP info */ qp->remote_qpn, rdma_ah_get_dlid(&qp->remote_ah_attr), rdma_ah_get_sl(&qp->remote_ah_attr), -- cgit v1.2.3 From 9c2d33d44dccfb0cdac987ddc3716527ebf971e0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Jun 2017 08:40:59 +0100 Subject: net/mlx5: fix spelling mistake: "alloated" -> "allocated" Trivial fix to spelling mistake in mlx5_ib_dbg message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index faafea0b7c2f..fe60c363d95b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1137,7 +1137,7 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) return -EINVAL; - mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n", + mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n", MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", lib_uar_4k ? "yes" : "no", ref_bfregs, req->total_num_bfregs, *num_sys_pages); -- cgit v1.2.3 From ad84dad2160d5f36bb471b391462d651c887d693 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:05 +0300 Subject: RDMA/qedr: notify user application if DPM is supported Direct Packet Mode support may be disabled, e.g, due to limited resources. Notifying the user application prevents wasting cycles on attempting to send these kind of packets. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/main.c | 1 + drivers/infiniband/hw/qedr/qedr.h | 2 ++ drivers/infiniband/hw/qedr/verbs.c | 1 + include/uapi/rdma/qedr-abi.h | 1 + 4 files changed, 5 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 0ae30f5c8cbc..199b6edbef92 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -777,6 +777,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, if (rc) goto init_err; + dev->user_dpm_enabled = dev_info.user_dpm_enabled; dev->num_hwfns = dev_info.common.num_hwfns; dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 392e76e26840..b2bb42e2805d 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -162,6 +162,8 @@ struct qedr_dev { struct qedr_qp *gsi_qp; unsigned long enet_state; + + u8 user_dpm_enabled; }; #define QEDR_MAX_SQ_PBL (0x8000) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 2ae71b8f1ba8..4322ee00498e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -376,6 +376,7 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); + uresp.dpm_enabled = dev->user_dpm_enabled; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 75c270d839c8..2684004ec4fd 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -49,6 +49,7 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_recv_wr; __u32 sges_per_srq_wr; __u32 max_cqes; + __u8 dpm_enabled; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3 From 67cbe3532c2cd84303a2073cedad6b8bcad13be3 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Mon, 26 Jun 2017 19:05:06 +0300 Subject: RDMA/qedr: notify user application of supported WIDs The number of supported WIDs, if they are supported at all, can be limited due to resources. Notifying the user space application the number of available WIDs allows it to utilize them correctly. Signed-off-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 ++ include/uapi/rdma/qedr-abi.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 4322ee00498e..9ee2dce3e5bb 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -377,6 +377,8 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, memset(&uresp, 0, sizeof(uresp)); uresp.dpm_enabled = dev->user_dpm_enabled; + uresp.wids_enabled = 1; + uresp.wid_count = oparams.wid_count; uresp.db_pa = ctx->dpi_phys_addr; uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h index 2684004ec4fd..54b64357ab24 100644 --- a/include/uapi/rdma/qedr-abi.h +++ b/include/uapi/rdma/qedr-abi.h @@ -50,6 +50,8 @@ struct qedr_alloc_ucontext_resp { __u32 sges_per_srq_wr; __u32 max_cqes; __u8 dpm_enabled; + __u8 wids_enabled; + __u16 wid_count; }; struct qedr_alloc_pd_ureq { -- cgit v1.2.3 From 3c7f67d1880db4bda8eed12ca603c92b5434390e Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 28 Jul 2017 13:47:24 -0400 Subject: IB/cma: Fix default RoCE type setting The initial patch for changing the stack to use RoCEv2 GIDs by default set the CMA_PREFERRED_ROCE_GID_TYPE to an incorrect value. Instead of an absolute value, we needed to set the right bit in a bitmask. Correct the default setting so we use RoCEv2 by default. Fixes: 63a5f483af0e (IB/cma: Set default gid type to RoCEv2) Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2d1fb8205ff0..ca4135c596ba 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,7 +72,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 -#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP +#define CMA_PREFERRED_ROCE_GID_TYPE (1 << IB_GID_TYPE_ROCE_UDP_ENCAP) static const char * const cma_events[] = { [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", -- cgit v1.2.3 From e89bf462b6bece63a60723af88c76dce9dbe6b85 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 8 Jun 2017 17:23:50 +0300 Subject: IB/hns: Support compile test for hns RoCE driver Compiling the hns RoCE driver requires ARM architecture. In order to simplify development of IB/core, support compile test. Add the necessary includes for that too. Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 2 +- drivers/infiniband/hw/hns/hns_roce_alloc.c | 1 + drivers/infiniband/hw/hns/hns_roce_mr.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index e1a6e055cd60..cbe6b51394fd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,7 +1,7 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on ARM64 && HNS && HNS_DSAF && HNS_ENET + depends on (ARM64 || COMPILE_TEST) && HNS && HNS_DSAF && HNS_ENET ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hi1610 and more further ICT SoC. diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 605962f2828c..e1b433cdd5e2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -32,6 +32,7 @@ */ #include +#include #include "hns_roce_device.h" int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 80fc01ffd8bd..e387360e3780 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" -- cgit v1.2.3 From 3756c7f59a4538bb40d77a25221dc411cc7dd2e9 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:24 +0800 Subject: IB/hns: fix boolreturn.cocci warnings drivers/infiniband/hw/hns/hns_roce_qp.c:802:9-10: WARNING: return of 0/1 in function 'hns_roce_wq_overflow' with return type bool Return statements in functions returning bool should use true/false instead of 1/0. Generated by: scripts/coccinelle/misc/boolreturn.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 054c52699090..f5dd21c2d275 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -799,7 +799,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; if (likely(cur + nreq < hr_wq->max_post)) - return 0; + return false; hr_cq = to_hr_cq(ib_cq); spin_lock(&hr_cq->lock); -- cgit v1.2.3 From 87809f83a4ba0bfe2b7f6b7d4611994fdbed8c8f Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:25 +0800 Subject: IB/hns: fix returnvar.cocci warnings drivers/infiniband/hw/hns/hns_roce_hw_v1.c:2026:5-8: Unneeded variable: "ret". Return "0" on line 2046 Remove unneeded variable used to store return value. Generated by: scripts/coccinelle/misc/returnvar.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 23fad6d96944..38f5c77baabf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2023,7 +2023,6 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; u32 doorbell[2]; - int ret = 0; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2043,7 +2042,7 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) hns_roce_write64_k(doorbell, hr_cq->cq_db_l); - return ret; + return 0; } static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, -- cgit v1.2.3 From d1d71499a65f8d0c7aa776aa70c56974ead45e56 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 25 Jul 2017 13:36:25 +0800 Subject: IB/hns: fix semicolon.cocci warnings drivers/infiniband/hw/hns/hns_roce_eq.c:295:3-4: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") CC: Matan Barak Signed-off-by: Fengguang Wu Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 50f864935a0e..0881813c9f14 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -292,7 +292,7 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n", event_type, eq->eqn, eq->cons_index); break; - }; + } eq->cons_index++; aeqes_found = 1; -- cgit v1.2.3 From ecd840ff9b793ac60e3e6658414525535349a17b Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 28 Jul 2017 15:17:18 -0400 Subject: RDMA/hns: fix build regression The 0day build system flags implicit includes as errors. A patch from Matan Barak to allow hns_roce, an aarch64 specific RDMA driver, to be built on other arches, but it resulted in build regressions. The problem is that hns_roce_device.h needs a definition for __raw_writeq but did not have an include to provide it. Add as an include to resolve the issue. Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e493a61e14e1..aeef2adf1a45 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,6 +33,7 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H +#include #include #define DRV_NAME "hns_roce" -- cgit v1.2.3 From 967de35826070026e51e66dcf020059ef6ce26b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 28 Jul 2017 15:35:22 +0200 Subject: IB/hns: include linux/interrupt.h I ran into this build error on linux-next: drivers/infiniband/hw/hns/hns_roce_eq.c:477:8: error: unknown type name 'irqreturn_t' static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr) ^~~~~~~~~~~ drivers/infiniband/hw/hns/hns_roce_eq.c: In function 'hns_roce_msi_x_interrupt': drivers/infiniband/hw/hns/hns_roce_eq.c:485:9: error: implicit declaration of function 'IRQ_RETVAL'; did you mean 'BPF_RVAL'? [-Werror=implicit-function-declaration] return IRQ_RETVAL(int_work); I have bisected this to a seemingly unrelated change that happened to remove some indirect header inclusions. Simply including the required header explicitly fixes the build failure. Fixes: 09c7570480f7 ("xfrm: remove flow cache") Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_eq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c index 0881813c9f14..b0f43735de1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_eq.c +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -31,6 +31,7 @@ */ #include +#include #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_eq.h" -- cgit v1.2.3 From c53df62c7a9a82c7375308af270f90a08e94f6b6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 30 Jun 2017 13:14:40 -0700 Subject: IB/hfi1: Check return values from PCI config API calls Ensure that return values from kernel PCI config access API calls in HFI driver are checked and react properly if they are not expected (i.e. not successful). Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 22 +++- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/pcie.c | 238 +++++++++++++++++++++++++++++++------- 3 files changed, 215 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 937350d9deab..efc84810e821 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -13846,9 +13846,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd) * a reset following the (possible) FLR in this routine. * */ -static void init_chip(struct hfi1_devdata *dd) +static int init_chip(struct hfi1_devdata *dd) { int i; + int ret = 0; /* * Put the HFI CSRs in a known state. @@ -13896,12 +13897,22 @@ static void init_chip(struct hfi1_devdata *dd) pcie_flr(dd->pcidev); /* restore command and BARs */ - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } if (is_ax(dd)) { dd_dev_info(dd, "Resetting CSRs with FLR\n"); pcie_flr(dd->pcidev); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return ret; + } } } else { dd_dev_info(dd, "Resetting CSRs with writes\n"); @@ -13929,6 +13940,7 @@ static void init_chip(struct hfi1_devdata *dd) write_csr(dd, ASIC_QSFP1_OUT, 0x1f); write_csr(dd, ASIC_QSFP2_OUT, 0x1f); init_chip_resources(dd); + return ret; } static void init_early_variables(struct hfi1_devdata *dd) @@ -14914,7 +14926,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, goto bail_cleanup; /* obtain chip sizes, reset chip CSRs */ - init_chip(dd); + ret = init_chip(dd); + if (ret) + goto bail_cleanup; /* read in the PCIe link speed information */ ret = pcie_speeds(dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1a33a5087734..62f843d7b099 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1854,7 +1854,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); -void restore_pci_variables(struct hfi1_devdata *dd); +int restore_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index f01841b51946..903ea45bf650 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -68,7 +68,7 @@ /* * Code to adjust PCIe capabilities. */ -static void tune_pcie_caps(struct hfi1_devdata *); +static int tune_pcie_caps(struct hfi1_devdata *); /* * Do all the common PCIe setup and initialization. @@ -161,6 +161,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; + int ret = 0; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -207,19 +208,56 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) /* * Save BARs and command to rewrite after device reset. */ - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl); - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3); - pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto read_error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto read_error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto read_error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto read_error; return 0; + +read_error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; } /* @@ -270,8 +308,14 @@ static u32 extract_width(u16 linkstat) static void update_lbus_info(struct hfi1_devdata *dd) { u16 linkstat; + int ret; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return; + } - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); dd->lbus_width = extract_width(linkstat); dd->lbus_speed = extract_speed(linkstat); snprintf(dd->lbus_info, sizeof(dd->lbus_info), @@ -286,6 +330,7 @@ int pcie_speeds(struct hfi1_devdata *dd) { u32 linkcap; struct pci_dev *parent = dd->pcidev->bus->self; + int ret; if (!pci_is_pcie(dd->pcidev)) { dd_dev_err(dd, "Can't find PCI Express capability!\n"); @@ -295,7 +340,12 @@ int pcie_speeds(struct hfi1_devdata *dd) /* find if our max speed is Gen3 and parent supports Gen3 speeds */ dd->link_gen3_capable = 1; - pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) { dd_dev_info(dd, "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n", @@ -327,7 +377,7 @@ int pcie_speeds(struct hfi1_devdata *dd) */ int request_msix(struct hfi1_devdata *dd, u32 msireq) { - int nvec; + int nvec, ret; nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, PCI_IRQ_MSIX | PCI_IRQ_LEGACY); @@ -336,7 +386,12 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) return nvec; } - tune_pcie_caps(dd); + ret = tune_pcie_caps(dd); + if (ret) { + dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret); + pci_free_irq_vectors(dd->pcidev); + return ret; + } /* check for legacy IRQ */ if (nvec == 1 && !dd->pcidev->msix_enabled) @@ -346,19 +401,61 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq) } /* restore command and BARs after a reset has wiped them out */ -void restore_pci_variables(struct hfi1_devdata *dd) +int restore_pci_variables(struct hfi1_devdata *dd) { - pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0); - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1); - pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl); - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, - dd->pcie_devctl2); - pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); - pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3); - pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + int ret = 0; + + ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + dd->pcibar0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + dd->pcibar1); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, + dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, + dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2, + dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; } /* @@ -373,21 +470,33 @@ uint aspm_mode = ASPM_MODE_DISABLED; module_param_named(aspm, aspm_mode, uint, S_IRUGO); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); -static void tune_pcie_caps(struct hfi1_devdata *dd) +static int tune_pcie_caps(struct hfi1_devdata *dd) { struct pci_dev *parent; u16 rc_mpss, rc_mps, ep_mpss, ep_mps; u16 rc_mrrs, ep_mrrs, max_mrrs, ectl; + int ret; /* * Turn on extended tags in DevCtl in case the BIOS has turned it off * to improve WFR SDMA bandwidth */ - pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); + ret = pcie_capability_read_word(dd->pcidev, + PCI_EXP_DEVCTL, &ectl); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; + } + if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) { dd_dev_info(dd, "Enabling PCIe extended tags\n"); ectl |= PCI_EXP_DEVCTL_EXT_TAG; - pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); + ret = pcie_capability_write_word(dd->pcidev, + PCI_EXP_DEVCTL, ectl); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return ret; + } } /* Find out supported and configured values for parent (root) */ parent = dd->pcidev->bus->self; @@ -396,14 +505,14 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) * access to the upstream component. */ if (!parent) - return; + return -EINVAL; if (!pci_is_root_bus(parent->bus)) { dd_dev_info(dd, "Parent not root\n"); - return; + return -EINVAL; } if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) - return; + return -EINVAL; rc_mpss = parent->pcie_mpss; rc_mps = ffs(pcie_get_mps(parent)) - 8; /* Find out supported and configured values for endpoint (us) */ @@ -449,6 +558,8 @@ static void tune_pcie_caps(struct hfi1_devdata *dd) ep_mrrs = max_mrrs; pcie_set_readrq(dd->pcidev, ep_mrrs); } + + return 0; } /* End of PCIe capability tuning */ @@ -680,6 +791,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, u32 violation; u32 i; u8 c_minus1, c0, c_plus1; + int ret; for (i = 0; i < 11; i++) { /* set index */ @@ -691,8 +803,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs, pci_write_config_dword(pdev, PCIE_CFG_REG_PL102, eq_value(c_minus1, c0, c_plus1)); /* check if these coefficients violate EQ rules */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105, - &violation); + ret = pci_read_config_dword(dd->pcidev, + PCIE_CFG_REG_PL105, &violation); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + hit_error = 1; + break; + } + if (violation & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){ if (hit_error == 0) { @@ -1146,7 +1264,13 @@ retry: * that it is Gen3 capable earlier. */ dd_dev_info(dd, "%s: setting parent target link speed\n", __func__); - pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); /* only write to parent if target is not as high as ours */ @@ -1155,20 +1279,37 @@ retry: lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(parent, + PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } } else { dd_dev_info(dd, "%s: ..target speed is OK\n", __func__); } dd_dev_info(dd, "%s: setting target link speed\n", __func__); - pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__, (u32)lnkctl2); lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK; lnkctl2 |= target_vector; dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__, (u32)lnkctl2); - pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2); + if (ret) { + dd_dev_err(dd, "Unable to write to PCI config\n"); + return_error = 1; + goto done; + } /* step 5h: arm gasket logic */ /* hold DC in reset across the SBR */ @@ -1218,7 +1359,14 @@ retry: /* restore PCI space registers we know were reset */ dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__); - restore_pci_variables(dd); + ret = restore_pci_variables(dd); + if (ret) { + dd_dev_err(dd, "%s: Could not restore PCI variables\n", + __func__); + return_error = 1; + goto done; + } + /* restore firmware control */ write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl); @@ -1248,7 +1396,13 @@ retry: setextled(dd, 0); /* check for any per-lane errors */ - pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, ®32); + if (ret) { + dd_dev_err(dd, "Unable to read from PCI config\n"); + return_error = 1; + goto done; + } + dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32); /* extract status, look for our HFI */ -- cgit v1.2.3 From cb51c5d2cda855302910ab352f3d391c1a00aba0 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 24 Jul 2017 07:45:31 -0700 Subject: IB/hfi1: Fix bar0 mapping to use write combining When the debugpat kernel boot flag is turned on the following traces are printed: [ 1884.793168] x86/PAT: Overlap at 0x90000000-0x92000000 [ 1884.803510] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track uncached-minus, req write-combining, ret uncached-minus [ 1884.818167] hfi1 0000:05:00.0: hfi1_0: WC Remapped RcvArray: ffffc9000a980000 The ioremap_wc() clearly is not returning a write combining mapping due to an overlap where the RcvArray is mapped in a uncached mapping prior to creating the proposed write combining mapping. The patch replaces the single base register for uncached CSRs that used to overlap the RcvArray with two mappings. One, kregbase1, from the bar0 up to the RcvArray and another, kregbase2, from the end of the RcvArray to the pio send buffer space. A new dd field, base2_start, is used to convert the zero-based offset in the CSR routines to the correct kregbase1/kregbase2 mapping. A single direct write of the RcvArray CSRs is replaced with hfi1_put_tid() to insure correct access using the new disjoint mapping. Additionally, the kregend field is deleted since it is only ever written. patdebug now shows the RcvArray as write combining: [ 35.688990] x86/PAT: reserve_memtype added [mem 0x91200000-0x9127ffff], track write-combining, req write-combining, ret write-combining To insulate from any potential issues with write combining, all writeq are now flushed in hfi1_put_tid() and rcv_array_wc_fill(). Reviewed-by: Mitko Haralanov Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 79 ++++++++++++++++++++++++------- drivers/infiniband/hw/hfi1/chip.h | 4 +- drivers/infiniband/hw/hfi1/driver.c | 4 +- drivers/infiniband/hw/hfi1/exp_rcv.h | 5 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 20 ++++---- drivers/infiniband/hw/hfi1/pcie.c | 58 ++++++++++++++++------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 3 +- 8 files changed, 126 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index efc84810e821..4fce173c8667 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1297,25 +1297,71 @@ CNTR_ELEM(#name, \ CNTR_SYNTH, \ access_ibp_##cntr) +/** + * hfi_addr_from_offset - return addr for readq/writeq + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * This routine selects the appropriate base address + * based on the indicated offset. + */ +static inline void __iomem *hfi1_addr_from_offset( + const struct hfi1_devdata *dd, + u32 offset) +{ + if (offset >= dd->base2_start) + return dd->kregbase2 + (offset - dd->base2_start); + return dd->kregbase1 + offset; +} + +/** + * read_csr - read CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: the value read or all FF's if there + * is no mapping + */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) { - if (dd->flags & HFI1_PRESENT) { - return readq((void __iomem *)dd->kregbase + offset); - } + if (dd->flags & HFI1_PRESENT) + return readq(hfi1_addr_from_offset(dd, offset)); return -1; } +/** + * write_csr - write CSR at the indicated offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * @value - value to write + */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { - if (dd->flags & HFI1_PRESENT) - writeq(value, (void __iomem *)dd->kregbase + offset); + if (dd->flags & HFI1_PRESENT) { + void __iomem *base = hfi1_addr_from_offset(dd, offset); + + /* avoid write to RcvArray */ + if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start)) + return; + writeq(value, base); + } } +/** + * get_csr_addr - return te iomem address for offset + * @dd - the dd device + * @offset - the offset of the CSR within bar0 + * + * Return: The iomem address to use in subsequent + * writeq/readq operations. + */ void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset) { - return (void __iomem *)dd->kregbase + offset; + if (dd->flags & HFI1_PRESENT) + return hfi1_addr_from_offset(dd, offset); + return NULL; } static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr, @@ -9752,14 +9798,13 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order) { u64 reg; - void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc : - (dd->kregbase + RCV_ARRAY)); if (!(dd->flags & HFI1_PRESENT)) goto done; - if (type == PT_INVALID) { + if (type == PT_INVALID || type == PT_INVALID_FLUSH) { pa = 0; + order = 0; } else if (type > PT_INVALID) { dd_dev_err(dd, "unexpected receive array type %u for index %u, not handled\n", @@ -9773,13 +9818,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) << RCV_ARRAY_RT_ADDR_SHIFT; - trace_hfi1_write_rcvarray(base + (index * 8), reg); - writeq(reg, base + (index * 8)); + trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg); + writeq(reg, dd->rcvarray_wc + (index * 8)); - if (type == PT_EAGER) + if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3) /* - * Eager entries are written one-by-one so we have to push them - * after we write the entry. + * Eager entries are written and flushed + * + * Expected entries are flushed every 4 writes */ flush_wc(); done: @@ -13411,8 +13457,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd) /* RcvArray */ for (i = 0; i < dd->chip_rcv_array_count; i++) - write_csr(dd, RCV_ARRAY + (8 * i), - RCV_ARRAY_RT_WRITE_ENABLE_SMASK); + hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); /* RcvQPMapTable */ for (i = 0; i < 32; i++) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 3dab3156ba4a..d3622cb74b3f 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -605,11 +605,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data); int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data); void __iomem *get_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, u32 offset); static inline void __iomem *get_kctxt_csr_addr( - struct hfi1_devdata *dd, + const struct hfi1_devdata *dd, int ctxt, u32 offset0) { diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 26628cb61437..4a149cca4718 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -195,7 +195,7 @@ int hfi1_count_active_units(void) spin_lock_irqsave(&hfi1_devs_lock, flags); list_for_each_entry(dd, &hfi1_dev_list, list) { - if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase) + if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1) continue; for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1282,7 +1282,7 @@ int hfi1_reset_device(int unit) dd_dev_info(dd, "Reset on unit %u requested\n", unit); - if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) { + if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) { dd_dev_info(dd, "Invalid unit number %u or not initialized or not present\n", unit); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index c7d02bcddded..08719047628a 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -137,8 +137,11 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index) * Doing the WC fill writes only makes sense if the device is * present and the RcvArray has been mapped as WC memory. */ - if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) + if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) { writeq(0, dd->rcvarray_wc + (index * 8)); + if ((index & 3) == 3) + flush_wc(); + } } static inline void tid_group_add_tail(struct tid_group *grp, diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index bbf80b1dd9d9..fbf52841aea4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -181,7 +181,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); - if (!((dd->flags & HFI1_PRESENT) && dd->kregbase)) + if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1)) return -EINVAL; if (!atomic_inc_not_zero(&dd->user_refcount)) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 62f843d7b099..2e5137b336ce 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -867,12 +867,15 @@ struct hfi1_devdata { struct device *diag_device; struct device *ui_device; - /* mem-mapped pointer to base of chip regs */ - u8 __iomem *kregbase; - /* end of mem-mapped chip space excluding sendbuf and user regs */ - u8 __iomem *kregend; - /* physical address of chip for io_remap, etc. */ + /* first mapping up to RcvArray */ + u8 __iomem *kregbase1; resource_size_t physaddr; + + /* second uncached mapping from RcvArray to pio send buffers */ + u8 __iomem *kregbase2; + /* for detecting offset above kregbase2 address */ + u32 base2_start; + /* Per VL data. Enough for all VLs but not all elements are set/used. */ struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; /* send context data */ @@ -1236,9 +1239,10 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) #define dc8051_ver_patch(a) ((a) & 0x0000ff) /* f_put_tid types */ -#define PT_EXPECTED 0 -#define PT_EAGER 1 -#define PT_INVALID 2 +#define PT_EXPECTED 0 +#define PT_EAGER 1 +#define PT_INVALID_FLUSH 2 +#define PT_INVALID 3 struct tid_rb_node; struct mmu_rb_node; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 903ea45bf650..cc7be224095d 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -180,31 +180,47 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -EINVAL; } - dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND); - if (!dd->kregbase) + dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY); + if (!dd->kregbase1) { + dd_dev_err(dd, "UC mapping of kregbase1 failed\n"); return -ENOMEM; + } + dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); + dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); + dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; + + dd->kregbase2 = ioremap_nocache( + addr + dd->base2_start, + TXE_PIO_SEND - dd->base2_start); + if (!dd->kregbase2) { + dd_dev_err(dd, "UC mapping of kregbase2 failed\n"); + goto nomem; + } + dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2, + TXE_PIO_SEND - dd->base2_start); dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE); if (!dd->piobase) { - iounmap(dd->kregbase); - return -ENOMEM; + dd_dev_err(dd, "WC mapping of send buffers failed\n"); + goto nomem; } + dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); - dd->flags |= HFI1_PRESENT; /* now register routines work */ - - dd->kregend = dd->kregbase + TXE_PIO_SEND; dd->physaddr = addr; /* used for io_remap, etc. */ /* - * Re-map the chip's RcvArray as write-combining to allow us + * Map the chip's RcvArray as write-combining to allow us * to write an entire cacheline worth of entries in one shot. - * If this re-map fails, just continue - the RcvArray programming - * function will handle both cases. */ - dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT); dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, dd->chip_rcv_array_count * 8); - dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc); + if (!dd->rcvarray_wc) { + dd_dev_err(dd, "WC mapping of receive array failed\n"); + goto nomem; + } + dd_dev_info(dd, "WC RcvArray: %p for %x\n", + dd->rcvarray_wc, dd->chip_rcv_array_count * 8); /* * Save BARs and command to rewrite after device reset. */ @@ -253,10 +269,16 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) if (ret) goto read_error; + dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; read_error: dd_dev_err(dd, "Unable to read from PCI config\n"); + goto bail_error; +nomem: + ret = -ENOMEM; +bail_error: + hfi1_pcie_ddcleanup(dd); return ret; } @@ -267,15 +289,19 @@ read_error: */ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd) { - u64 __iomem *base = (void __iomem *)dd->kregbase; - dd->flags &= ~HFI1_PRESENT; - dd->kregbase = NULL; - iounmap(base); + if (dd->kregbase1) + iounmap(dd->kregbase1); + dd->kregbase1 = NULL; + if (dd->kregbase2) + iounmap(dd->kregbase2); + dd->kregbase2 = NULL; if (dd->rcvarray_wc) iounmap(dd->rcvarray_wc); + dd->rcvarray_wc = NULL; if (dd->piobase) iounmap(dd->piobase); + dd->piobase = NULL; } /* return the PCIe link speed from the given link status */ diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6318e6ca1b18..3baf71944471 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -814,12 +814,11 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) node->npages, node->mmu.addr, node->phys, node->dma_addr); - hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0); /* * Make sure device has seen the write before we unpin the * pages. */ - flush_wc(); + hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, PCI_DMA_FROMDEVICE); -- cgit v1.2.3 From bcad29137a9731bfa5e16d64bf8e8a71a268ac88 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 24 Jul 2017 07:45:37 -0700 Subject: IB/hfi1: Serve the most starved iowait entry first When an egress resource(SDMA descriptors, pio credits) is not available, a sending thread will be put on the resource's wait queue. When the resource becomes available again, up to a fixed number of sending threads can be awakened sequentially and removed from the wait queue, depending on the number of waiting threads and the number of free resources. Since each awakened sending thread will send as many packets as possible, it is highly likely that the first sending thread will consume all the egress resources. Subsequently, it will be put back to the end of the wait queue. Depending on the timing when the later sending threads wake up, they may not be able to send any packet and be again put back to the end of the wait queue sequentially, right behind the first sending thread. This starvation cycle continues until some sending threads exceed their retry limit and consequently fail. This patch fixes the issue by two simple approaches: (1) Any starved sending thread will be put to the head of the wait queue while a served sending thread will be put to the tail; (2) The most starved sending thread will be served first. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/iowait.h | 70 +++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/pio.c | 13 +++++-- drivers/infiniband/hw/hfi1/qp.c | 9 +++-- drivers/infiniband/hw/hfi1/ruc.c | 5 ++- drivers/infiniband/hw/hfi1/sdma.c | 34 ++++++++++++----- drivers/infiniband/hw/hfi1/sdma.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 8 ++-- drivers/infiniband/hw/hfi1/verbs.c | 6 ++- drivers/infiniband/hw/hfi1/verbs.h | 1 + drivers/infiniband/hw/hfi1/vnic.h | 1 + drivers/infiniband/hw/hfi1/vnic_sdma.c | 14 +++++-- 11 files changed, 136 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index d9740ddea6f1..591697d85eed 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -106,7 +106,9 @@ struct iowait { struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq); + uint seq, + bool pkts_sent + ); void (*wakeup)(struct iowait *wait, int reason); void (*sdma_drained)(struct iowait *wait); seqlock_t *lock; @@ -118,6 +120,7 @@ struct iowait { u32 count; u32 tx_limit; u32 tx_count; + u8 starved_cnt; }; #define SDMA_AVAIL_REASON 0 @@ -143,7 +146,8 @@ static inline void iowait_init( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *tx, - unsigned seq), + uint seq, + bool pkts_sent), void (*wakeup)(struct iowait *wait, int reason), void (*sdma_drained)(struct iowait *wait)) { @@ -305,4 +309,66 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) return tx; } +/** + * iowait_queue - Put the iowait on a wait queue + * @pkts_sent: have some packets been sent before queuing? + * @w: the iowait struct + * @wait_head: the wait queue + * + * This function is called to insert an iowait struct into a + * wait queue after a resource (eg, sdma decriptor or pio + * buffer) is run out. + */ +static inline void iowait_queue(bool pkts_sent, struct iowait *w, + struct list_head *wait_head) +{ + /* + * To play fair, insert the iowait at the tail of the wait queue if it + * has already sent some packets; Otherwise, put it at the head. + */ + if (pkts_sent) { + list_add_tail(&w->list, wait_head); + w->starved_cnt = 0; + } else { + list_add(&w->list, wait_head); + w->starved_cnt++; + } +} + +/** + * iowait_starve_clear - clear the wait queue's starve count + * @pkts_sent: have some packets been sent? + * @w: the iowait struct + * + * This function is called to clear the starve count. If no + * packets have been sent, the starve count will not be cleared. + */ +static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w) +{ + if (pkts_sent) + w->starved_cnt = 0; +} + +/** + * iowait_starve_find_max - Find the maximum of the starve count + * @w: the iowait struct + * @max: a variable containing the max starve count + * @idx: the index of the current iowait in an array + * @max_idx: a variable containing the array index for the + * iowait entry that has the max starve count + * + * This function is called to compare the starve count of a + * given iowait with the given max starve count. The max starve + * count and the index will be updated if the iowait's start + * count is larger. + */ +static inline void iowait_starve_find_max(struct iowait *w, u8 *max, + uint idx, uint *max_idx) +{ + if (w->starved_cnt > *max) { + *max = w->starved_cnt; + *max_idx = idx; + } +} + #endif diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ed72b5aca139..adb6a4da6107 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1568,7 +1568,8 @@ static void sc_piobufavail(struct send_context *sc) struct rvt_qp *qp; struct hfi1_qp_priv *priv; unsigned long flags; - unsigned i, n = 0; + uint i, n = 0, max_idx = 0; + u8 max_starved_cnt = 0; if (dd->send_contexts[sc->sw_index].type != SC_KERNEL && dd->send_contexts[sc->sw_index].type != SC_VL15) @@ -1591,6 +1592,7 @@ static void sc_piobufavail(struct send_context *sc) priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; + iowait_starve_find_max(wait, &max_starved_cnt, n, &max_idx); /* refcount held until actual wake up */ qps[n++] = qp; } @@ -1605,9 +1607,14 @@ static void sc_piobufavail(struct send_context *sc) } write_sequnlock_irqrestore(&dev->iowait_lock, flags); - for (i = 0; i < n; i++) - hfi1_qp_wakeup(qps[i], + /* Wake up the most starved one first */ + if (n) + hfi1_qp_wakeup(qps[max_idx], RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); + for (i = 0; i < n; i++) + if (i != max_idx) + hfi1_qp_wakeup(qps[i], + RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); } /* translate a send credit update to a bit code of reasons */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 806d166cf6ee..b801d8469956 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -68,7 +68,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq); + unsigned int seq, + bool pkts_sent); static void iowait_wakeup(struct iowait *wait, int reason); static void iowait_sdma_drained(struct iowait *wait); static void qp_pio_drain(struct rvt_qp *qp); @@ -371,7 +372,8 @@ static int iowait_sleep( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *stx, - unsigned seq) + uint seq, + bool pkts_sent) { struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); struct rvt_qp *qp; @@ -402,7 +404,8 @@ static int iowait_sleep( ibp->rvp.n_dmawait++; qp->s_flags |= RVT_S_WAIT_DMA_DESC; - list_add_tail(&priv->s_iowait.list, &sde->dmawait); + iowait_queue(pkts_sent, &priv->s_iowait, + &sde->dmawait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 9cf506a9a796..f13ddb273d77 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -811,6 +811,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, static bool schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { + ps->pkts_sent = true; + if (unlikely(time_after(jiffies, ps->timeout))) { if (!ps->in_thread || workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { @@ -907,6 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) ps.timeout = jiffies + ps.timeout_int; ps.cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); + ps.pkts_sent = false; /* insure a pre-built packet is handled */ ps.s_txreq = get_waiting_verbs_txreq(qp); @@ -929,7 +932,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread) spin_lock_irqsave(&qp->s_lock, ps.flags); } } while (make_req(qp, &ps)); - + iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); spin_unlock_irqrestore(&qp->s_lock, ps.flags); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index d82ff57214c5..71b4258545c4 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -246,7 +246,7 @@ static void __sdma_process_event( enum sdma_events event); static void dump_sdma_state(struct sdma_engine *sde); static void sdma_make_progress(struct sdma_engine *sde, u64 status); -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail); +static void sdma_desc_avail(struct sdma_engine *sde, uint avail); static void sdma_flush_descq(struct sdma_engine *sde); /** @@ -1762,13 +1762,14 @@ retry: * * This is called with head_lock held. */ -static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) +static void sdma_desc_avail(struct sdma_engine *sde, uint avail) { struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; - unsigned i, n = 0, seq; + uint i, n = 0, seq, max_idx = 0; struct sdma_txreq *stx; struct hfi1_ibdev *dev = &sde->dd->verbs_dev; + u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx, @@ -1803,6 +1804,9 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) if (num_desc > avail) break; avail -= num_desc; + /* Find the most starved wait memeber */ + iowait_starve_find_max(wait, &max_starved_cnt, + n, &max_idx); list_del_init(&wait->list); waits[n++] = wait; } @@ -1811,8 +1815,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail) } } while (read_seqretry(&dev->iowait_lock, seq)); + /* Schedule the most starved one first */ + if (n) + waits[max_idx]->wakeup(waits[max_idx], SDMA_AVAIL_REASON); + for (i = 0; i < n; i++) - waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); + if (i != max_idx) + waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON); } /* head_lock must be held */ @@ -2349,7 +2358,8 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx) static int sdma_check_progress( struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret; @@ -2362,7 +2372,7 @@ static int sdma_check_progress( seq = raw_seqcount_begin( (const seqcount_t *)&sde->head_lock.seqcount); - ret = wait->sleep(sde, wait, tx, seq); + ret = wait->sleep(sde, wait, tx, seq, pkts_sent); if (ret == -EAGAIN) sde->desc_avail = sdma_descq_freecnt(sde); } else { @@ -2376,6 +2386,7 @@ static int sdma_check_progress( * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx: sdma_txreq to submit + * @pkts_sent: has any packet been sent yet? * * The call submits the tx into the ring. If a iowait structure is non-NULL * the packet will be queued to the list in wait. @@ -2387,7 +2398,8 @@ static int sdma_check_progress( */ int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx) + struct sdma_txreq *tx, + bool pkts_sent) { int ret = 0; u16 tail; @@ -2429,7 +2441,7 @@ unlock_noconn: ret = -ECOMM; goto unlock; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, pkts_sent); if (ret == -EAGAIN) { ret = 0; goto retry; @@ -2498,8 +2510,10 @@ retry: } update_tail: total_count = submit_count + flush_count; - if (wait) + if (wait) { iowait_sdma_add(wait, total_count); + iowait_starve_clear(submit_count > 0, wait); + } if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); @@ -2527,7 +2541,7 @@ unlock_noconn: ret = -ECOMM; goto update_tail; nodesc: - ret = sdma_check_progress(sde, wait, tx); + ret = sdma_check_progress(sde, wait, tx, submit_count > 0); if (ret == -EAGAIN) { ret = 0; goto retry; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 64f10b8b5db8..107011d8613b 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -852,7 +852,8 @@ struct iowait; int sdma_send_txreq(struct sdma_engine *sde, struct iowait *wait, - struct sdma_txreq *tx); + struct sdma_txreq *tx, + bool pkts_sent); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, struct list_head *tx_list, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8f7cfdd9e9ec..ea2993f93647 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -272,7 +272,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq); + uint seq, + bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); @@ -294,7 +295,8 @@ static int defer_packet_queue( struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned seq) + uint seq, + bool pkts_sent) { struct hfi1_user_sdma_pkt_q *pq = container_of(wait, struct hfi1_user_sdma_pkt_q, busy); @@ -314,7 +316,7 @@ static int defer_packet_queue( xchg(&pq->state, SDMA_PKT_Q_DEFERRED); write_seqlock(&dev->iowait_lock); if (list_empty(&pq->busy.list)) - list_add_tail(&pq->busy.list, &sde->dmawait); + iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; eagain: diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9c9ded643ed4..3ef6384eae40 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -864,7 +864,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (unlikely(ret)) goto bail_build; } - ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); + ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, + ps->pkts_sent); if (unlikely(ret < 0)) { if (ret == -ECOMM) goto bail_ecomm; @@ -921,7 +922,8 @@ static int pio_wait(struct rvt_qp *qp, dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); qp->s_flags |= flag; was_empty = list_empty(&sc->piowait); - list_add_tail(&priv->s_iowait.list, &sc->piowait); + iowait_queue(ps->pkts_sent, &priv->s_iowait, + &sc->piowait); priv->s_iowait.lock = &dev->iowait_lock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index fdf1e1fb880c..34267c7ef85a 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -143,6 +143,7 @@ struct hfi1_pkt_state { unsigned long timeout_int; int cpu; bool in_thread; + bool pkts_sent; }; #define HFI1_PSN_CREDIT 16 diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index 4a621cde4abb..eec7c1424991 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -103,6 +103,7 @@ struct hfi1_vnic_sdma { struct sdma_txreq stx; unsigned int state; u8 q_idx; + bool pkts_sent; }; /** diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 51a817d3aa14..7815d7405462 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -198,11 +198,16 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, goto free_desc; tx->retry_count = 0; - ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq); + ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, + vnic_sdma->pkts_sent); /* When -ECOMM, sdma callback will be called with ABORT status */ if (unlikely(ret && unlikely(ret != -ECOMM))) goto free_desc; + if (!ret) { + vnic_sdma->pkts_sent = true; + iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait); + } return ret; free_desc: @@ -211,6 +216,8 @@ free_desc: tx_err: if (ret != -EBUSY) dev_kfree_skb_any(skb); + else + vnic_sdma->pkts_sent = false; return ret; } @@ -225,7 +232,8 @@ tx_err: static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, struct iowait *wait, struct sdma_txreq *txreq, - unsigned int seq) + uint seq, + bool pkts_sent) { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait, struct hfi1_vnic_sdma, wait); @@ -239,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) - list_add_tail(&vnic_sdma->wait.list, &sde->dmawait); + iowait_queue(pkts_sent, wait, &sde->dmawait); write_sequnlock(&dev->iowait_lock); return -EBUSY; } -- cgit v1.2.3 From 42492011ab23f44c63dad0c7096492313dc207e3 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:43 -0700 Subject: IB/hfi1: Assign context does not clean up file descriptor correctly on error In the error path for context allocation, the file descriptor pointer should not point to a context when an error occurs. Clean up the appropriate references on error. Fixes: Commit 62239fc6e5545b2e59f83dfbc5db231a81f37a45 ("IB/hfi1: Clean up on context initialization failure") Reviewed-by: Ira Weiny Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index fbf52841aea4..d36e17722e6d 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -94,6 +94,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo); +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, @@ -813,15 +814,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) uctxt->rcvnowait = 0; uctxt->pionowait = 0; uctxt->event_flags = 0; - - hfi1_stats.sps_ctxts--; - if (++dd->freectxts == dd->num_user_contexts) - aspm_enable_all(dd); - - /* _rcd_put() should be done after releasing mutex */ - dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_rcd_put(uctxt); /* dd reference */ + + deallocate_ctxt(uctxt); done: mmdrop(fdata->mm); kobject_put(&dd->kobj); @@ -898,10 +893,9 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) if (!ret) ret = init_user_ctxt(fd); - if (ret) { + if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - hfi1_rcd_put(fd->uctxt); - } + } else if (!ret) { ret = setup_base_ctxt(fd); if (fd->uctxt->subctxt_cnt) { @@ -917,6 +911,14 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) &fd->uctxt->event_flags); wake_up(&fd->uctxt->wait); } + if (ret) + deallocate_ctxt(fd->uctxt); + } + + /* If an error occurred, clear the reference */ + if (ret && fd->uctxt) { + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; } return ret; @@ -1087,6 +1089,19 @@ ctxdata_free: return ret; } +static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) +{ + mutex_lock(&hfi1_mutex); + hfi1_stats.sps_ctxts--; + if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) + aspm_enable_all(uctxt->dd); + + /* _rcd_put() should be done after releasing mutex */ + uctxt->dd->rcd[uctxt->ctxt] = NULL; + mutex_unlock(&hfi1_mutex); + hfi1_rcd_put(uctxt); /* dd reference */ +} + static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo) { -- cgit v1.2.3 From 91d970abe8d756843eaac57da903bf27f834b091 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:49 -0700 Subject: IB/hfi1: Remove unused user context data members Several data members of the user context have become unused over time. Cleaning them up. Reviewed-by: Dennis Dalessandro Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 6 ------ drivers/infiniband/hw/hfi1/hfi.h | 24 ------------------------ drivers/infiniband/hw/hfi1/user_sdma.c | 11 ----------- drivers/infiniband/hw/hfi1/user_sdma.h | 1 - 4 files changed, 42 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d36e17722e6d..e10f526d2dc1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -809,10 +809,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); - uctxt->rcvwait_to = 0; - uctxt->piowait_to = 0; - uctxt->rcvnowait = 0; - uctxt->pionowait = 0; uctxt->event_flags = 0; mutex_unlock(&hfi1_mutex); @@ -1067,8 +1063,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); - INIT_LIST_HEAD(&uctxt->sdma_queues); - spin_lock_init(&uctxt->sdma_qlock); hfi1_stats.sps_ctxts++; /* * Disable ASPM when there are open user/PSM contexts to avoid diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2e5137b336ce..7877ebc0b7ae 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -243,24 +243,10 @@ struct hfi1_ctxtdata { /* lock protecting all Expected TID data */ struct mutex exp_lock; - /* number of pio bufs for this ctxt (all procs, if shared) */ - u32 piocnt; - /* first pio buffer for this ctxt */ - u32 pio_base; - /* chip offset of PIO buffers for this ctxt */ - u32 piobufs; /* per-context configuration flags */ unsigned long flags; /* per-context event flags for fileops/intr communication */ unsigned long event_flags; - /* WAIT_RCV that timed out, no interrupt */ - u32 rcvwait_to; - /* WAIT_PIO that timed out, no interrupt */ - u32 piowait_to; - /* WAIT_RCV already happened, no wait */ - u32 rcvnowait; - /* WAIT_PIO already happened, no wait */ - u32 pionowait; /* total number of polled urgent packets */ u32 urgent; /* saved total number of polled urgent packets for poll edge trigger */ @@ -290,7 +276,6 @@ struct hfi1_ctxtdata { u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; - u32 pkt_count; /* QPs waiting for context processing */ struct list_head qp_wait_list; /* interrupt handling */ @@ -299,15 +284,6 @@ struct hfi1_ctxtdata { unsigned numa_id; /* numa node of this context */ /* verbs stats per CTX */ struct hfi1_opcode_stats_perctx *opstats; - /* - * This is the kernel thread that will keep making - * progress on the user sdma requests behind the scenes. - * There is one per context (shared contexts use the master's). - */ - struct task_struct *progress; - struct list_head sdma_queues; - /* protect sdma queues */ - spinlock_t sdma_qlock; /* Is ASPM interrupt supported for this context */ bool aspm_intr_supported; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index ea2993f93647..64d31eb9bd82 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -346,7 +346,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_devdata *dd; struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; if (!uctxt || !fd) return -EBADF; @@ -360,7 +359,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq) return -ENOMEM; - INIT_LIST_HEAD(&pq->list); pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -421,10 +419,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, fd->pq = pq; fd->cq = cq; - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - list_add(&pq->list, &uctxt->sdma_queues); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); - return 0; pq_mmu_fail: @@ -447,7 +441,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; - unsigned long flags; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, uctxt->ctxt, fd->subctxt); @@ -455,10 +448,6 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) if (pq) { if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); - spin_lock_irqsave(&uctxt->sdma_qlock, flags); - if (!list_empty(&pq->list)) - list_del_init(&pq->list); - spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index e5b10aefe212..161fdfddbb30 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,6 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - struct list_head list; unsigned ctxt; u16 subctxt; u16 n_max_reqs; -- cgit v1.2.3 From e6f7622df177d594f11d93343c3dda7637c761e0 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:45:55 -0700 Subject: IB/hfi1: Size rcd array index correctly and consistently The array index for the rcd array is sized several different ways throughout the code. Use the user interface size (u16) as the standard size and update the necessary code to reflect this. u16 is large enough for the largest amount of supported contexts. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 6 +++--- drivers/infiniband/hw/hfi1/chip.c | 10 +++++----- drivers/infiniband/hw/hfi1/chip.h | 8 ++++---- drivers/infiniband/hw/hfi1/driver.c | 15 ++++++++------- drivers/infiniband/hw/hfi1/file_ops.c | 6 +++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++-- drivers/infiniband/hw/hfi1/init.c | 9 +++++---- drivers/infiniband/hw/hfi1/trace_rx.h | 2 +- drivers/infiniband/hw/hfi1/user_sdma.h | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 2 +- 10 files changed, 33 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 794e6814a531..3f9a071ccac3 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -237,7 +237,7 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { rcd = dd->rcd[i]; @@ -256,7 +256,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) { struct hfi1_ctxtdata *rcd; unsigned long flags; - unsigned i; + u16 i; aspm_enable(dd); @@ -284,7 +284,7 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) static inline void aspm_init(struct hfi1_devdata *dd) { - unsigned i; + u16 i; spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4fce173c8667..06a79a268d2e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6802,7 +6802,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { u32 rcvmask; - int i; + u16 i; /* enable all kernel contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) { @@ -11722,7 +11722,7 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) { struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; @@ -14542,7 +14542,7 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14579,7 +14579,7 @@ done: return ret; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) { struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; unsigned sctxt; @@ -14608,7 +14608,7 @@ done: return ret; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) { struct hfi1_ctxtdata *rcd; unsigned sctxt; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index d3622cb74b3f..008c970412ec 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,14 +1352,14 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 4a149cca4718..01235d4757a5 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -837,9 +837,9 @@ bail: return last; } -static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -857,9 +857,9 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u8 ctxt) &handle_receive_interrupt_nodma_rtail; } -static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) +static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) { - int i; + u16 i; /* * For dynamically allocated kernel contexts (like vnic) switch @@ -879,7 +879,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u8 ctxt) void set_all_slowpath(struct hfi1_devdata *dd) { - int i; + u16 i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { @@ -1068,7 +1068,7 @@ void receive_interrupt_work(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, linkstate_active_work); struct hfi1_devdata *dd = ppd->dd; - int i; + u16 i; /* Received non-SC15 packet implies neighbor_normal */ ppd->neighbor_normal = 1; @@ -1269,7 +1269,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, */ int hfi1_reset_device(int unit) { - int ret, i; + int ret; + u16 i; struct hfi1_devdata *dd = hfi1_lookup(unit); struct hfi1_pportdata *ppd; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e10f526d2dc1..c8f34bc6b620 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -927,7 +927,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) { - int i; + u16 i; struct hfi1_devdata *dd = fd->dd; u16 subctxt; @@ -978,7 +978,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -1429,7 +1429,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) { struct hfi1_ctxtdata *uctxt; struct hfi1_devdata *dd = ppd->dd; - unsigned ctxt; + u16 ctxt; int ret = 0; unsigned long flags; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7877ebc0b7ae..2ce3fc58b61b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -217,7 +217,7 @@ struct hfi1_ctxtdata { struct kref kref; /* Device context index */ - unsigned ctxt; + u16 ctxt; /* * non-zero if ctxt can be shared, and defines the maximum number of * sub-contexts for this device context. @@ -1264,7 +1264,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index dfdb4126ca05..818a4b87af60 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -131,7 +131,7 @@ unsigned long *hfi1_cpulist; */ int hfi1_create_ctxts(struct hfi1_devdata *dd) { - unsigned i; + u16 i; int ret; /* Control context has to be always 0 */ @@ -233,7 +233,7 @@ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, +struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, int numa) { struct hfi1_devdata *dd = ppd->dd; @@ -615,7 +615,7 @@ static int init_after_reset(struct hfi1_devdata *dd) static void enable_chip(struct hfi1_devdata *dd) { u32 rcvmask; - u32 i; + u16 i; /* enable PIO send */ pio_send_control(dd, PSC_GLOBAL_ENABLE); @@ -692,7 +692,8 @@ wq_error: int hfi1_init(struct hfi1_devdata *dd, int reinit) { int ret = 0, pidx, lastfail = 0; - unsigned i, len; + unsigned long len; + u16 i; struct hfi1_ctxtdata *rcd; struct hfi1_pportdata *ppd; diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 84929578cfe6..bebf0a8f214d 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -114,7 +114,7 @@ TRACE_EVENT(hfi1_rcvhdr, ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u32 ctxt), + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt), TP_ARGS(dd, ctxt), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(u32, ctxt) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 161fdfddbb30..73c2455189d3 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -56,7 +56,7 @@ extern uint extended_psn; struct hfi1_user_sdma_pkt_q { - unsigned ctxt; + u16 ctxt; u16 subctxt; u16 n_max_reqs; atomic_t n_reqs; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 5a3f80ba9752..9bd274408f32 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -106,7 +106,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata **vnic_ctxt) { struct hfi1_ctxtdata *uctxt; - unsigned int ctxt; + u16 ctxt; int ret; if (dd->flags & HFI1_FROZEN) -- cgit v1.2.3 From 17573972f44d6293ed4fe561816b701241cb0847 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:01 -0700 Subject: IB/hfi1: Use context pointer rather than context index The hfi1__ctxt_key functions take a context index and look up the context based on that index. Since the context index is being retrieved from the context, this doesn't seem optimal. Pass the context pointer for use, rather than the context index. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 90 ++++++++++++++--------------------- drivers/infiniband/hw/hfi1/chip.h | 8 ++-- drivers/infiniband/hw/hfi1/file_ops.c | 6 +-- 3 files changed, 45 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 06a79a268d2e..2f747023444b 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14542,99 +14542,86 @@ static void init_txe(struct hfi1_devdata *dd) write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE); } -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey) +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */ ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) << SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT); /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */ if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY)) reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg); /* * Enable send-side J_KEY integrity check, unless this is A0 h/w */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Enable J_KEY check on receive context. */ reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK | ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) << RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT); - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg); + + return 0; } -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt) +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0); + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0); /* * Disable send-side J_KEY integrity check, unless this is A0 h/w. * This check would not have been enabled for A0 h/w, see * set_ctxt_jkey(). */ if (!is_ax(dd)) { - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); } /* Turn off the J_KEY on the receive side */ - write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0); -done: - return ret; + write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0); + + return 0; } -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey) +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 pkey) { - struct hfi1_ctxtdata *rcd; - unsigned sctxt; - int ret = 0; + u8 hw_ctxt; u64 reg; - if (ctxt < dd->num_rcv_contexts) { - rcd = dd->rcd[ctxt]; - } else { - ret = -EINVAL; - goto done; - } - if (!rcd || !rcd->sc) { - ret = -EINVAL; - goto done; - } - sctxt = rcd->sc->hw_context; + if (!rcd || !rcd->sc) + return -EINVAL; + + hw_ctxt = rcd->sc->hw_context; reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) << SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); - reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE); + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg); + reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK; - write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg); -done: - return ret; + write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg); + + return 0; } int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) @@ -14645,9 +14632,6 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt) if (!ctxt || !ctxt->sc) return -EINVAL; - if (ctxt->ctxt >= dd->num_rcv_contexts) - return -EINVAL; - hw_ctxt = ctxt->sc->hw_context; reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE); reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 008c970412ec..995105214953 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1357,9 +1357,11 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val); -int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt, u16 jkey); -int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, u16 ctxt); -int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, u16 ctxt, u16 pkey); +int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, + u16 jkey); +int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); +int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt, + u16 pkey); int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt); void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index c8f34bc6b620..51e6a6f0e10f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -795,7 +795,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); /* Clear the context's J_KEY */ - hfi1_clear_ctxt_jkey(dd, uctxt->ctxt); + hfi1_clear_ctxt_jkey(dd, uctxt); /* * If a send context is allocated, reset context integrity * checks to default and disable the send context. @@ -1172,7 +1172,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) clear_rcvhdrtail(uctxt); /* Setup J_KEY before enabling the context */ - hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey); + hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) @@ -1545,7 +1545,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) } if (intable) - ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey); + ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); done: return ret; } -- cgit v1.2.3 From 2250563e2c935d6401a2203be4de4ca2cf0db183 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:06 -0700 Subject: IB/hfi1: Pass the context pointer rather than the index The hfi1_rcvctrl() function receives an index which it then converts to an rcd. Since most functions have the rcd, use that instead. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 14 ++++++++------ drivers/infiniband/hw/hfi1/chip.h | 3 ++- drivers/infiniband/hw/hfi1/file_ops.c | 11 +++++------ drivers/infiniband/hw/hfi1/init.c | 14 +++++++------- drivers/infiniband/hw/hfi1/intr.c | 2 +- drivers/infiniband/hw/hfi1/vnic_main.c | 4 ++-- 6 files changed, 25 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2f747023444b..a62ef4acdca4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6790,7 +6790,7 @@ static void rxe_freeze(struct hfi1_devdata *dd) /* disable all receive contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]); } /* @@ -6814,9 +6814,9 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, rcd); } /* enable port */ @@ -11722,16 +11722,18 @@ static u32 encoded_size(u32 size) return 0x1; /* if invalid, go with the minimum size */ } -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt) +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd) { - struct hfi1_ctxtdata *rcd; u64 rcvctrl, reg; int did_enable = 0; + u16 ctxt; - rcd = dd->rcd[ctxt]; if (!rcd) return; + ctxt = rcd->ctxt; + hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op); rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 995105214953..bef6301d825d 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1352,7 +1352,8 @@ void hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, u32 type, unsigned long pa, u16 order); void hfi1_quiet_serdes(struct hfi1_pportdata *ppd); -void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, u16 ctxt); +void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, + struct hfi1_ctxtdata *rcd); u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp); u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp); int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 51e6a6f0e10f..7be75e0d4f7e 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -389,8 +389,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, sc_disable(sc); ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, - uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); } else { ret = sc_restart(sc); } @@ -793,7 +792,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* Clear the context's J_KEY */ hfi1_clear_ctxt_jkey(dd, uctxt); /* @@ -1198,7 +1197,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; else rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); } static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, @@ -1410,7 +1409,7 @@ static unsigned int poll_next(struct file *fp, spin_lock_irq(&dd->uctxt_lock); if (hdrqempty(uctxt)) { set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt); pollflag = 0; } else { pollflag = POLLIN | POLLRDNORM; @@ -1495,7 +1494,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, } else { rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS; } - hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt); + hfi1_rcvctrl(dd, rcvctrl_op, uctxt); /* always; new head should be equal to new tail; see above */ bail: return 0; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 818a4b87af60..7f5e4c7dca89 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -603,8 +603,8 @@ static int init_after_reset(struct hfi1_devdata *dd) */ for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_TAILUPD_DIS, i); + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]); pio_send_control(dd, PSC_GLOBAL_DISABLE); for (i = 0; i < dd->num_send_contexts; i++) sc_disable(dd->send_contexts[i].sc); @@ -634,7 +634,7 @@ static void enable_chip(struct hfi1_devdata *dd) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - hfi1_rcvctrl(dd, rcvmask, i); + hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]); sc_enable(dd->rcd[i]->sc); } } @@ -915,10 +915,10 @@ static void shutdown_device(struct hfi1_devdata *dd) ppd = dd->pport + pidx; for (i = 0; i < dd->num_rcv_contexts; i++) hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | - HFI1_RCVCTRL_CTXT_DIS | - HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_PKEY_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i); + HFI1_RCVCTRL_CTXT_DIS | + HFI1_RCVCTRL_INTRAVAIL_DIS | + HFI1_RCVCTRL_PKEY_DIS | + HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]); /* * Gracefully stop all sends allowing any in progress to * trickle out first. diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 04a5082d5ac5..9469be9940e0 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -196,7 +196,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd) if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) { wake_up_interruptible(&rcd->wait); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd); } else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG, &rcd->event_flags)) { rcd->urgent++; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 9bd274408f32..89b056489769 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -95,7 +95,7 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; - hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); + hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); uctxt->is_vnic = true; done: @@ -186,7 +186,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_INTRAVAIL_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | - HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); + HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); /* * VNIC contexts are allocated from user context pool. * Release them back to user context pool. -- cgit v1.2.3 From bf90aadd630c2c9f7f965ba1e90d41b5b46db7c9 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:12 -0700 Subject: IB/hfi1: Send MAD traps until repressed A trap should be sent to the FM until the FM sends a repress message. This is in line with the IBTA 13.4.9. Add the ability to resend traps until a repress message is received. Reviewed-by: Dennis Dalessandro Reviewed-by: Michael N. Henry Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 373 ++++++++++++++++++++++++++++--------- drivers/infiniband/hw/hfi1/mad.h | 3 +- drivers/infiniband/hw/hfi1/verbs.c | 5 + include/rdma/rdma_vt.h | 17 ++ 4 files changed, 310 insertions(+), 88 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index a081a98d728a..0a3e2dfdf56e 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -59,6 +59,16 @@ #define OPA_LINK_WIDTH_RESET_OLD 0x0fff #define OPA_LINK_WIDTH_RESET 0xffff +struct trap_node { + struct list_head list; + struct opa_mad_notice_attr data; + __be64 tid; + int len; + u32 retry; + u8 in_use; + u8 repress; +}; + static int smp_length_check(u32 data_size, u32 request_len) { if (unlikely(request_len < data_size)) @@ -97,28 +107,156 @@ void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) ib_dispatch_event(&event); } -static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) +/* + * If the port is down, clean up all pending traps. We need to be careful + * with the given trap, because it may be queued. + */ +static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap) +{ + struct trap_node *node, *q; + unsigned long flags; + struct list_head trap_list; + int i; + + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + spin_lock_irqsave(&ibp->rvp.lock, flags); + list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list); + ibp->rvp.trap_lists[i].list_len = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + /* + * Remove all items from the list, freeing all the non-given + * traps. + */ + list_for_each_entry_safe(node, q, &trap_list, list) { + list_del(&node->list); + if (node != trap) + kfree(node); + } + } + + /* + * If this wasn't on one of the lists it would not be freed. If it + * was on the list, it is now safe to free. + */ + kfree(trap); +} + +static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, + struct trap_node *trap) +{ + struct trap_node *node; + struct trap_list *trap_list; + unsigned long flags; + unsigned long timeout; + int found = 0; + + /* + * Since the retry (handle timeout) does not remove a trap request + * from the list, all we have to do is compare the node. + */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F]; + + list_for_each_entry(node, &trap_list->list, list) { + if (node == trap) { + node->retry++; + found = 1; + break; + } + } + + /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */ + if (!found) { + if (trap_list->list_len < RVT_MAX_TRAP_LEN) { + trap_list->list_len++; + list_add_tail(&trap->list, &trap_list->list); + } else { + pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n", + trap->data.generic_type); + kfree(trap); + } + } + + /* + * Next check to see if there is a timer pending. If not, set it up + * and get the first trap from the list. + */ + node = NULL; + if (!timer_pending(&ibp->rvp.trap_timer)) { + /* + * o14-2 + * If the time out is set we have to wait until it expires + * before the trap can be sent. + * This should be > RVT_TRAP_TIMEOUT + */ + timeout = (RVT_TRAP_TIMEOUT * + (1UL << ibp->rvp.subnet_timeout)) / 1000; + mod_timer(&ibp->rvp.trap_timer, + jiffies + usecs_to_jiffies(timeout)); + node = list_first_entry(&trap_list->list, struct trap_node, + list); + node->in_use = 1; + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + return node; +} + +static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp, + struct opa_smp *smp) +{ + struct trap_list *trap_list; + struct trap_node *trap; + unsigned long flags; + int i; + + if (smp->attr_id != IB_SMP_ATTR_NOTICE) + return; + + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) { + trap_list = &ibp->rvp.trap_lists[i]; + trap = list_first_entry_or_null(&trap_list->list, + struct trap_node, list); + if (trap && trap->tid == smp->tid) { + if (trap->in_use) { + trap->repress = 1; + } else { + trap_list->list_len--; + list_del(&trap->list); + kfree(trap); + } + break; + } + } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); +} + +static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) { struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent; struct opa_smp *smp; - int ret; unsigned long flags; - unsigned long timeout; int pkey_idx; u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp; agent = ibp->rvp.send_agent; - if (!agent) + if (!agent) { + cleanup_traps(ibp, trap); return; + } /* o14-3.2.1 */ - if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) + if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) { + cleanup_traps(ibp, trap); return; + } - /* o14-2 */ - if (ibp->rvp.trap_timeout && time_before(jiffies, - ibp->rvp.trap_timeout)) + /* Add the trap to the list if necessary and see if we can send it */ + trap = check_and_add_trap(ibp, trap); + if (!trap) return; pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); @@ -139,11 +277,21 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; smp->class_version = OPA_SM_CLASS_VERSION; smp->method = IB_MGMT_METHOD_TRAP; - ibp->rvp.tid++; - smp->tid = cpu_to_be64(ibp->rvp.tid); + + /* Only update the transaction ID for new traps (o13-5). */ + if (trap->tid == 0) { + ibp->rvp.tid++; + /* make sure that tid != 0 */ + if (ibp->rvp.tid == 0) + ibp->rvp.tid++; + trap->tid = cpu_to_be64(ibp->rvp.tid); + } + smp->tid = trap->tid; + smp->attr_id = IB_SMP_ATTR_NOTICE; /* o14-1: smp->mkey = 0; */ - memcpy(smp->route.lid.data, data, len); + + memcpy(smp->route.lid.data, &trap->data, trap->len); spin_lock_irqsave(&ibp->rvp.lock, flags); if (!ibp->rvp.sm_ah) { @@ -152,31 +300,72 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid); if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - } else { - send_buf->ah = ah; - ibp->rvp.sm_ah = ibah_to_rvtah(ah); - ret = 0; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } + send_buf->ah = ah; + ibp->rvp.sm_ah = ibah_to_rvtah(ah); } else { - ret = -EINVAL; + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + return; } } else { send_buf->ah = &ibp->rvp.sm_ah->ibah; - ret = 0; } + + /* + * If the trap was repressed while things were getting set up, don't + * bother sending it. This could happen for a retry. + */ + if (trap->repress) { + list_del(&trap->list); + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + kfree(trap); + ib_free_send_mad(send_buf); + return; + } + + trap->in_use = 0; spin_unlock_irqrestore(&ibp->rvp.lock, flags); - if (!ret) - ret = ib_post_send_mad(send_buf, NULL); - if (!ret) { - /* 4.096 usec. */ - timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000; - ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout); - } else { + if (ib_post_send_mad(send_buf, NULL)) ib_free_send_mad(send_buf); - ibp->rvp.trap_timeout = 0; +} + +void hfi1_handle_trap_timer(unsigned long data) +{ + struct hfi1_ibport *ibp = (struct hfi1_ibport *)data; + struct trap_node *trap = NULL; + unsigned long flags; + int i; + + /* Find the trap with the highest priority */ + spin_lock_irqsave(&ibp->rvp.lock, flags); + for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) { + trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list, + struct trap_node, list); } + spin_unlock_irqrestore(&ibp->rvp.lock, flags); + + if (trap) + send_trap(ibp, trap); +} + +static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid) +{ + struct trap_node *trap; + + trap = kzalloc(sizeof(*trap), GFP_ATOMIC); + if (!trap) + return NULL; + + INIT_LIST_HEAD(&trap->list); + trap->data.generic_type = type; + trap->data.prod_type_lsb = IB_NOTICE_PROD_CA; + trap->data.trap_num = trap_num; + trap->data.issuer_lid = cpu_to_be32(lid); + + return trap; } /* @@ -185,28 +374,29 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, u32 qp1, u32 qp2, u16 lid1, u16 lid2) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; u32 _lid1 = lid1; u32 _lid2 = lid2; - memset(&data, 0, sizeof(data)); ibp->rvp.n_pkt_drops++; ibp->rvp.pkey_violations++; + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_P_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_257_258.lid1 = cpu_to_be32(_lid1); - data.ntc_257_258.lid2 = cpu_to_be32(_lid2); - data.ntc_257_258.key = cpu_to_be32(key); - data.ntc_257_258.sl = sl << 3; - data.ntc_257_258.qp1 = cpu_to_be32(qp1); - data.ntc_257_258.qp2 = cpu_to_be32(qp2); - - send_trap(ibp, &data, sizeof(data)); + trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1); + trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2); + trap->data.ntc_257_258.key = cpu_to_be32(key); + trap->data.ntc_257_258.sl = sl << 3; + trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1); + trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2); + + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -215,34 +405,36 @@ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, __be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY, + lid); + if (!trap) + return; + /* Send violation trap */ - data.generic_type = IB_NOTICE_TYPE_SECURITY; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_BAD_M_KEY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_256.lid = data.issuer_lid; - data.ntc_256.method = mad->method; - data.ntc_256.attr_id = mad->attr_id; - data.ntc_256.attr_mod = mad->attr_mod; - data.ntc_256.mkey = mkey; + trap->data.ntc_256.lid = trap->data.issuer_lid; + trap->data.ntc_256.method = mad->method; + trap->data.ntc_256.attr_id = mad->attr_id; + trap->data.ntc_256.attr_mod = mad->attr_mod; + trap->data.ntc_256.mkey = mkey; if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - data.ntc_256.dr_slid = dr_slid; - data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; - if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) { - data.ntc_256.dr_trunc_hop |= + trap->data.ntc_256.dr_slid = dr_slid; + trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; + if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) { + trap->data.ntc_256.dr_trunc_hop |= IB_NOTICE_TRAP_DR_TRUNC; - hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path); + hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path); } - data.ntc_256.dr_trunc_hop |= hop_cnt; - memcpy(data.ntc_256.dr_rtn_path, return_path, + trap->data.ntc_256.dr_trunc_hop |= hop_cnt; + memcpy(trap->data.ntc_256.dr_rtn_path, return_path, hop_cnt); } - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + + send_trap(ibp, trap); } /* @@ -250,23 +442,24 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, */ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) { - struct opa_mad_notice_attr data; + struct trap_node *trap; struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); - data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); + trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -274,19 +467,19 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num) */ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_SYSGUID; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; - data.ntc_145.lid = data.issuer_lid; + trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid; + trap->data.ntc_145.lid = trap->data.issuer_lid; - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } /* @@ -294,20 +487,21 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp) */ void hfi1_node_desc_chg(struct hfi1_ibport *ibp) { - struct opa_mad_notice_attr data; + struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - memset(&data, 0, sizeof(data)); + trap = create_trap_node(IB_NOTICE_TYPE_INFO, + OPA_TRAP_CHANGE_CAPABILITY, + lid); + if (!trap) + return; - data.generic_type = IB_NOTICE_TYPE_INFO; - data.prod_type_lsb = IB_NOTICE_PROD_CA; - data.trap_num = OPA_TRAP_CHANGE_CAPABILITY; - data.issuer_lid = cpu_to_be32(lid); - data.ntc_144.lid = data.issuer_lid; - data.ntc_144.change_flags = + trap->data.ntc_144.lid = trap->data.issuer_lid; + trap->data.ntc_144.change_flags = cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG); - send_trap(ibp, &data, sizeof(data)); + trap->len = sizeof(trap->data); + send_trap(ibp, trap); } static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am, @@ -4144,6 +4338,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, */ ret = IB_MAD_RESULT_SUCCESS; break; + case IB_MGMT_METHOD_TRAP_REPRESS: + subn_handle_opa_trap_repress(ibp, smp); + /* Always successful */ + ret = IB_MAD_RESULT_SUCCESS; + break; default: smp->status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_mad_hdr *)smp); diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index a4e2506bd5ca..4c1245072093 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -428,5 +428,6 @@ struct sc2vlnt { COUNTER_MASK(1, 4)) void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); +void hfi1_handle_trap_timer(unsigned long data); #endif /* _HFI1_MAD_H */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3ef6384eae40..dc51bf247006 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1535,6 +1535,11 @@ static void init_ibport(struct hfi1_pportdata *ppd) ibp->sc_to_sl[i] = i; } + for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) + INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); + setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, + (unsigned long)ibp); + spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 22fb15ff5e8b..fdfac0fd2f82 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -57,11 +57,21 @@ #include #include #include +#include #include #include #define RVT_MAX_PKEY_VALUES 16 +#define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ +#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ + +struct trap_list { + u32 list_len; + struct list_head list; +}; + struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ @@ -128,6 +138,13 @@ struct rvt_ibport { u16 *pkey_table; struct rvt_ah *sm_ah; + + /* + * Keep a list of traps that have not been repressed. They will be + * resent based on trap_timer. + */ + struct trap_list trap_lists[RVT_MAX_TRAP_LISTS]; + struct timer_list trap_timer; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ -- cgit v1.2.3 From 59ec8736956456b7429bbfd613b288928416a0f0 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 24 Jul 2017 07:46:18 -0700 Subject: IB/hfi1: Fix code consistency for if/else blocks in chip.c Code structure is not consistent for if/else blocks and break instructions in set_link_state for case HLS_UP_INIT. Physical state uses break in case of an error and if/else blocks for logical use cases. These blocks should be implemented consistently. Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a62ef4acdca4..789dbc40ebd0 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10569,18 +10569,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) dd_dev_err(dd, "%s: logical state did not change to INIT\n", __func__); - } else { - /* clear old transient LINKINIT_REASON code */ - if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) - ppd->linkinit_reason = - OPA_LINKINIT_REASON_LINKUP; + break; + } - /* enable the port */ - add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + /* clear old transient LINKINIT_REASON code */ + if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR) + ppd->linkinit_reason = + OPA_LINKINIT_REASON_LINKUP; - handle_linkup_change(dd, 1); - ppd->host_link_state = HLS_UP_INIT; - } + /* enable the port */ + add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); + + handle_linkup_change(dd, 1); + ppd->host_link_state = HLS_UP_INIT; break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) -- cgit v1.2.3 From a156abb3cf14112d3aa3f3a9ba880d48cf064cef Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Mon, 24 Jul 2017 07:46:24 -0700 Subject: IB/hfi1: Fix initialization failure for debug firmware Loading debug signed firmware fails if started immediately after failed attempt to load production firmware. A short delay is required so add about a 100us delay after RSA check failure. Reviewed-by: Dennis Dalessandro Reviewed-by: Easwar Hariharan Reviewed-by: Mike Marciniszyn Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 4042c11b2742..83efd6bec657 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -615,6 +615,14 @@ retry: fw_fabric_serdes_name = ALT_FW_FABRIC_NAME; fw_sbus_name = ALT_FW_SBUS_NAME; fw_pcie_serdes_name = ALT_FW_PCIE_NAME; + + /* + * Add a delay before obtaining and loading debug firmware. + * Authorization will fail if the delay between firmware + * authorization events is shorter than 50us. Add 100us to + * make a delay time safe. + */ + usleep_range(100, 120); } if (fw_sbus_load) { -- cgit v1.2.3 From a618b7e40af2b2b751790d602ffa93800b594eca Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Mon, 24 Jul 2017 07:46:30 -0700 Subject: IB/hfi1: Move saving PCI values to a separate function During PCIe initialization some registers' values from PCI config space are saved in order to restore them later (i.e. after reset). Restoring those value is done by a function called restore_pci_variables, while saving them is put directly into function hfi1_pcie_ddinit. Move saving values to a separate function in the image of restoring functionality. Reviewed-by: Jakub Byczkowski Reviewed-by: Mike Marciniszyn Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 ++ drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/pcie.c | 110 ++++++++++++++++++++------------------ 3 files changed, 64 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 789dbc40ebd0..4a4405ee9302 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14866,6 +14866,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_free; + /* Save PCI space registers to rewrite after device reset */ + ret = save_pci_variables(dd); + if (ret < 0) + goto bail_cleanup; + /* verify that reads actually work, save revision for reset check */ dd->revision = read_csr(dd, CCE_REVISION); if (dd->revision == ~(u64)0) { diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2ce3fc58b61b..2d32c5c314a3 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1835,6 +1835,7 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *dd); int request_msix(struct hfi1_devdata *dd, u32 msireq); int restore_pci_variables(struct hfi1_devdata *dd); +int save_pci_variables(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd); int get_platform_config_field(struct hfi1_devdata *dd, diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index cc7be224095d..82447b7cdda1 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -221,63 +221,11 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) } dd_dev_info(dd, "WC RcvArray: %p for %x\n", dd->rcvarray_wc, dd->chip_rcv_array_count * 8); - /* - * Save BARs and command to rewrite after device reset. - */ - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); - if (ret) - goto read_error; - - ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, - &dd->pcie_devctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, - &dd->pcie_lnkctl); - if (ret) - goto read_error; - - ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, - &dd->pcie_devctl2); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, - &dd->pci_lnkctl3); - if (ret) - goto read_error; - - ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); - if (ret) - goto read_error; dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ return 0; - -read_error: - dd_dev_err(dd, "Unable to read from PCI config\n"); - goto bail_error; nomem: ret = -ENOMEM; -bail_error: hfi1_pcie_ddcleanup(dd); return ret; } @@ -484,6 +432,64 @@ error: return ret; } +/* Save BARs and command to rewrite after device reset */ +int save_pci_variables(struct hfi1_devdata *dd) +{ + int ret = 0; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + &dd->pcibar0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + &dd->pcibar1); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); + if (ret) + goto error; + + ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, + &dd->pcie_devctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, + &dd->pcie_lnkctl); + if (ret) + goto error; + + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2, + &dd->pcie_devctl2); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, + &dd->pci_lnkctl3); + if (ret) + goto error; + + ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2); + if (ret) + goto error; + + return 0; + +error: + dd_dev_err(dd, "Unable to read from PCI config\n"); + return ret; +} + /* * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. -- cgit v1.2.3 From 5e2d6764a729bf8d43894b0368b0ae11a19485c0 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 24 Jul 2017 07:46:36 -0700 Subject: IB/hfi1: Verify port data VLs credits on transition to Armed There is a window where the FM can read the buffer control table and decide not to program buffers. When a port goes down, the code clears the table and if it is not programmed, posted SDMA descriptors will never complete due to no buffer credits. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 16 +++++++++++++++- drivers/infiniband/hw/hfi1/intr.c | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 4a4405ee9302..8f7ce74a15de 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10489,6 +10489,14 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, } } +/* + * Verify if BCT for data VLs is non-zero. + */ +static inline bool data_vls_operational(struct hfi1_pportdata *ppd) +{ + return !!ppd->actual_vls_operational; +} + /* * Change the physical and/or logical link state. * @@ -10587,6 +10595,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_INIT) goto unexpected; + if (!data_vls_operational(ppd)) { + dd_dev_err(dd, + "%s: data VLs not operational\n", __func__); + ret = -EINVAL; + break; + } + ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); @@ -14832,7 +14847,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, } ppd->vls_supported = num_vls; ppd->vls_operational = ppd->vls_supported; - ppd->actual_vls_operational = ppd->vls_supported; /* Set the default MTU. */ for (vl = 0; vl < num_vls; vl++) dd->vld[vl].mtu = hfi1_max_mtu; diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 9469be9940e0..96845dfed5c5 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -164,6 +164,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) ppd->linkup = 0; /* clear HW details of the previous connection */ + ppd->actual_vls_operational = 0; reset_link_credits(dd); /* freeze after a link down to guarantee a clean egress */ -- cgit v1.2.3 From f13a6e5e2e0192737c3bdbdb16c5cc0181cc86e5 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 24 Jul 2017 07:46:42 -0700 Subject: IB/hfi1: Split copy_to_user data copy for better security A copy_to_user() call assumes that two members of a data structure are sequential. Since this may not always be true, separate the copies to ensure a safe copy. Reviewed-by: Dennis Dalessandro Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7be75e0d4f7e..650c1e578775 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -268,12 +268,14 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, /* * Copy the number of tidlist entries we used * and the length of the buffer we registered. - * These fields are adjacent in the structure so - * we can copy them at the same time. */ addr = arg + offsetof(struct hfi1_tid_info, tidcnt); if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt) + + sizeof(tinfo.tidcnt))) + return -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); + if (copy_to_user((void __user *)addr, &tinfo.length, sizeof(tinfo.length))) ret = -EFAULT; } -- cgit v1.2.3 From 5efd40cad4bf98f0d1f6b726d67e39decd85edc9 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Sat, 29 Jul 2017 08:43:20 -0700 Subject: IB/hfi1: Harden state transition to Armed and Active There is a window that allows other threads to read state of 'host_link_state' as a new, before the hardware actual state is set. This patch closes the window by indicating a new state only after hardware transition is complete. Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8f7ce74a15de..eca31dd49cae 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10602,16 +10602,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; } - ppd->host_link_state = HLS_UP_ARMED; set_logical_state(dd, LSTATE_ARMED); ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000); if (ret) { - /* logical state didn't change, stay at init */ - ppd->host_link_state = HLS_UP_INIT; dd_dev_err(dd, "%s: logical state did not change to ARMED\n", __func__); + break; } + ppd->host_link_state = HLS_UP_ARMED; /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first @@ -10624,18 +10623,16 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_UP_ARMED) goto unexpected; - ppd->host_link_state = HLS_UP_ACTIVE; set_logical_state(dd, LSTATE_ACTIVE); ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000); if (ret) { - /* logical state didn't change, stay at armed */ - ppd->host_link_state = HLS_UP_ARMED; dd_dev_err(dd, "%s: logical state did not change to ACTIVE\n", __func__); } else { /* tell all engines to go running */ sdma_all_running(dd); + ppd->host_link_state = HLS_UP_ACTIVE; /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.rdi.ibdev; -- cgit v1.2.3 From 96603ed865b3c5a9a0d1c9ed434661519d49fde3 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:26 -0700 Subject: IB/hfi1: Do not enable disabled port on cable insert Fix issue where a disabled port can be enabled by inserting a cable. The port should be explicitly enabled instead. Reviewed-by: Michael J. Ruhl Reviewed-by: Jakub Byczkowski Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index eca31dd49cae..bd594e7b0078 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9306,12 +9306,6 @@ int start_link(struct hfi1_pportdata *ppd) */ tune_serdes(ppd); - if (!ppd->link_enabled) { - dd_dev_info(ppd->dd, - "%s: stopping link start because link is disabled\n", - __func__); - return 0; - } if (!ppd->driver_link_ready) { dd_dev_info(ppd->dd, "%s: stopping link start because driver is not ready\n", @@ -9529,6 +9523,13 @@ void qsfp_event(struct work_struct *work) if (!qsfp_mod_present(ppd)) return; + if (ppd->host_link_state == HLS_DN_DISABLE) { + dd_dev_info(ppd->dd, + "%s: stopping link start because link is disabled\n", + __func__); + return; + } + /* * Turn DC back on after cable has been re-inserted. Up until * now, the DC has been in reset to save power. -- cgit v1.2.3 From e87473bc1b6c2cb08f1b760cfc8cd012822241a6 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Sat, 29 Jul 2017 08:43:32 -0700 Subject: IB/hfi1: Only set fd pointer when base context is completely initialized The allocate_ctxt() function adds the context to the fd data structure. Since the context is not completely initialized, this can cause confusion as to whether the context is valid or not. Move the fd reference from allocate_ctxt() to setup_base_ctxt(). Update the necessary functions to be aware of this move. Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 55 ++++++++++++++++++------------- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 4 +-- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 3 +- drivers/infiniband/hw/hfi1/user_sdma.c | 4 +-- drivers/infiniband/hw/hfi1/user_sdma.h | 3 +- 5 files changed, 40 insertions(+), 29 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 650c1e578775..a0c13fa5babb 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -81,19 +81,22 @@ static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); static int init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); -static int init_user_ctxt(struct hfi1_filedata *fd); +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, __u32 len); -static int setup_base_ctxt(struct hfi1_filedata *fd); +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo); static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo); + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd); static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); @@ -759,7 +762,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) flush_wc(); /* drain user sdma queue */ - hfi1_user_sdma_free_queues(fdata); + hfi1_user_sdma_free_queues(fdata, uctxt); /* release the cpu */ hfi1_put_proc_affinity(fdata->rec_cpu_num); @@ -845,6 +848,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { int ret; unsigned int swmajor, swminor; + struct hfi1_ctxtdata *uctxt = NULL; swmajor = uinfo->userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) @@ -870,7 +874,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) * couldn't find a sub context. */ if (!ret) - ret = allocate_ctxt(fd, fd->dd, uinfo); + ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); mutex_unlock(&hfi1_mutex); @@ -888,28 +892,27 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) /* The only thing a sub context needs is the user_xxx stuff */ if (!ret) - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, fd->uctxt); if (ret) clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); } else if (!ret) { - ret = setup_base_ctxt(fd); - if (fd->uctxt->subctxt_cnt) { + ret = setup_base_ctxt(fd, uctxt); + if (uctxt->subctxt_cnt) { /* If there is an error, set the failed bit. */ if (ret) set_bit(HFI1_CTXT_BASE_FAILED, - &fd->uctxt->event_flags); + &uctxt->event_flags); /* * Base context is done, notify anybody using a * sub-context that is waiting for this completion */ - clear_bit(HFI1_CTXT_BASE_UNINIT, - &fd->uctxt->event_flags); - wake_up(&fd->uctxt->wait); + clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); + wake_up(&uctxt->wait); } if (ret) - deallocate_ctxt(fd->uctxt); + deallocate_ctxt(uctxt); } /* If an error occurred, clear the reference */ @@ -976,7 +979,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, } static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, - struct hfi1_user_info *uinfo) + struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata **cd) { struct hfi1_ctxtdata *uctxt; u16 ctxt; @@ -1071,14 +1075,13 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, */ if (dd->freectxts-- == dd->num_user_contexts) aspm_disable_all(dd); - fd->uctxt = uctxt; - /* Count the reference for the fd */ - hfi1_rcd_get(uctxt); + *cd = uctxt; return 0; ctxdata_free: + *cd = NULL; dd->rcd[ctxt] = NULL; hfi1_rcd_put(uctxt); return ret; @@ -1243,23 +1246,25 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, return ret; } -static int init_user_ctxt(struct hfi1_filedata *fd) +static int init_user_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; int ret; ret = hfi1_user_sdma_alloc_queues(uctxt, fd); if (ret) return ret; - ret = hfi1_user_exp_rcv_init(fd); + ret = hfi1_user_exp_rcv_init(fd, uctxt); + if (ret) + hfi1_user_sdma_free_queues(fd, uctxt); return ret; } -static int setup_base_ctxt(struct hfi1_filedata *fd) +static int setup_base_ctxt(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; @@ -1284,12 +1289,16 @@ static int setup_base_ctxt(struct hfi1_filedata *fd) if (ret) goto setup_failed; - ret = init_user_ctxt(fd); + ret = init_user_ctxt(fd, uctxt); if (ret) goto setup_failed; user_init(uctxt); + /* Now that the context is set up, the fd can get a reference. */ + fd->uctxt = uctxt; + hfi1_rcd_get(uctxt); + return 0; setup_failed: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 3baf71944471..d9036ba3f4eb 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -104,9 +104,9 @@ static struct mmu_rb_ops tid_rb_ops = { * receive caching. This needs to be done after the context has * been configured with the eager/expected RcvEntry counts. */ -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd) +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; int ret = 0; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 1bdc61be53cb..6cbaa4cf4970 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,7 +51,8 @@ #include "exp_rcv.h" -int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd); +int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 64d31eb9bd82..8f02707e9c95 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -437,9 +437,9 @@ pq_reqs_nomem: return ret; } -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt) { - struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_user_sdma_pkt_q *pq; hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 73c2455189d3..84c199d3e003 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -81,7 +81,8 @@ struct hfi1_user_sdma_comp_q { int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); -int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd); +int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, + struct hfi1_ctxtdata *uctxt); int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); -- cgit v1.2.3 From c822652ea669f8f46e4e8e2fc0618f2f72103f14 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Sat, 29 Jul 2017 08:43:37 -0700 Subject: IB/hfi1: Disambiguate corruption and uninitialized error cases The error messages when checksum validation of the platform configuration fields populated into the ASIC scratch registers fails are ambiguous. Disambiguate them. Reviewed-by: Jakub Byczkowski Signed-off-by: Easwar Hariharan Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 41307e474525..5c38a24484ab 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -58,8 +58,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT; /* Prevent power on default of all zeroes from passing checksum */ - if (!version) + if (!version) { + dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__); + dd_dev_err(dd, + "%s: Please update your BIOS to support active channels\n", + __func__); return 0; + } /* * ASIC scratch 0 only contains the checksum and bitmap version as @@ -84,6 +89,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd) if (checksum + temp_scratch == 0xFFFF) return 1; + + dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__); return 0; } @@ -144,11 +151,6 @@ void get_platform_config(struct hfi1_devdata *dd) save_platform_config_fields(dd); return; } - dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n", - __func__); - dd_dev_err(dd, - "%s: Please update your BIOS to support active channels\n", - __func__); } else { ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, -- cgit v1.2.3 From 3ffea7d8cd9e3f8f96514ac499f2510ad2f31d11 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sat, 29 Jul 2017 08:43:43 -0700 Subject: IB/{rdmavt, hfi1, qib}: Fix panic with post receive and SGE compression The server side of qperf panics as follows: [242446.336860] IP: report_bug+0x64/0x10 [242446.341031] PGD 1c0c067 [242446.341032] P4D 1c0c067 [242446.343951] PUD 1c0d063 [242446.346870] PMD 8587ea067 [242446.349788] PTE 800000083e14016 [242446.352901] [242446.358352] Oops: 0003 [#1] SM [242446.437919] CPU: 1 PID: 7442 Comm: irq/92-hfi1_0 k Not tainted 4.12.0-mam-asm #1 [242446.446365] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0018.C4.072020161249 07/20/201 [242446.458397] task: ffff8808392d2b80 task.stack: ffffc9000664000 [242446.465097] RIP: 0010:report_bug+0x64/0x10 [242446.469859] RSP: 0018:ffffc900066439c0 EFLAGS: 0001000 [242446.475784] RAX: ffffffffa06647e4 RBX: ffffffffa06461e1 RCX: 000000000000000 [242446.483840] RDX: 0000000000000907 RSI: ffffffffa0675040 RDI: ffffffffffff740 [242446.491897] RBP: ffffc900066439e0 R08: 0000000000000001 R09: 000000000000025 [242446.499953] R10: ffffffff81a253df R11: 0000000000000133 R12: ffffc90006643b3 [242446.508010] R13: ffffffffa065bbf0 R14: 00000000000001e5 R15: 000000000000000 [242446.516067] FS: 0000000000000000(0000) GS:ffff88085f640000(0000) knlGS:000000000000000 [242446.525191] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003 [242446.531698] CR2: ffffffffa06647ee CR3: 0000000001c09000 CR4: 00000000001406e [242446.539756] Call Trace [242446.542582] fixup_bug+0x2c/0x5 [242446.546277] do_trap+0x12b/0x18 [242446.549972] do_error_trap+0x89/0x11 [242446.554171] ? hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.559324] ? ttwu_do_wakeup+0x1e/0x14 [242446.563795] ? ttwu_do_activate+0x77/0x8 [242446.568363] do_invalid_op+0x20/0x3 [242446.572448] invalid_op+0x1e/0x3 [242446.576247] RIP: 0010:hfi1_copy_sge+0x271/0x2b0 [hfi1 [242446.582075] RSP: 0018:ffffc90006643be8 EFLAGS: 0001004 [242446.587999] RAX: 0000000000000000 RBX: ffff88083e0fa240 RCX: 000000000000000 [242446.596058] RDX: 0000000000000000 RSI: ffff880842508000 RDI: ffff88083e0fa24 [242446.604116] RBP: ffffc90006643c28 R08: 0000000000000000 R09: 000000000000000 [242446.612172] R10: ffffc90009473640 R11: 0000000000000133 R12: 000000000000000 [242446.620228] R13: 0000000000000000 R14: 0000000000002000 R15: ffff88084250800 [242446.628293] ? hfi1_copy_sge+0x1a1/0x2b0 [hfi1 [242446.633449] hfi1_rc_rcv+0x3da/0x1270 [hfi1 [242446.638312] ? sc_buffer_alloc+0x113/0x150 [hfi1 [242446.643662] hfi1_ib_rcv+0x1c9/0x2e0 [hfi1 [242446.648428] process_receive_ib+0x19a/0x270 [hfi1 [242446.653866] ? process_rcv_qp_work+0xd2/0x160 [hfi1 [242446.659505] handle_receive_interrupt_nodma_rtail+0x184/0x2e0 [hfi1 [242446.666693] ? irq_finalize_oneshot+0x100/0x10 [242446.671846] receive_context_thread+0x1b/0x140 [hfi1 [242446.677576] irq_thread_fn+0x1e/0x4 [242446.681659] irq_thread+0x13c/0x1b [242446.685646] ? irq_forced_thread_fn+0x60/0x6 [242446.690604] kthread+0x112/0x15 [242446.694298] ? irq_thread_check_affinity+0xe0/0xe [242446.699738] ? kthread_park+0x60/0x6 [242446.703919] ? do_syscall_64+0x67/0x15 [242446.708292] ret_from_fork+0x25/0x3 [242446.712374] Code: 63 78 04 44 0f b7 70 08 41 89 d0 4c 8d 2c 38 41 83 e0 01 f6 c2 02 74 17 66 45 85 c0 74 11 f6 c2 04 b9 01 00 00 00 75 bb 83 ca 04 <66> 89 50 0a 66 45 85 c0 74 52 0f b6 48 0b 41 0f b7 f6 4d 89 e0 [242446.733527] RIP: report_bug+0x64/0x100 RSP: ffffc900066439c [242446.739935] CR2: ffffffffa06647e [242446.743763] ---[ end trace 0e90a20d0aa494f7 ]-- The root cause is that the qib/hfi1 post receive call to rvt_lkey_ok() doesn't interpret the new return value from rvt_lkey_ok() properly leading to an mr reference count underrun. Additionally, remove an unused argument in rvt_sge_adjacent() aw well as an unneeded incr local in rvt_post_one_wr(). Fixes: Commit 14fe13fcd3af ("IB/rdmavt: Compress adjacent SGEs in rvt_lkey_ok()") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/ruc.c | 6 ++++-- drivers/infiniband/hw/qib/qib_ruc.c | 6 ++++-- drivers/infiniband/sw/rdmavt/mr.c | 8 +++----- drivers/infiniband/sw/rdmavt/qp.c | 16 +++++++--------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index f13ddb273d77..4afa00f921f2 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -74,8 +74,10 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 28528459a052..e6a42a8972f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -58,8 +58,10 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) if (wqe->sg_list[i].length == 0) continue; /* Check LKEY */ - if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + NULL, &wqe->sg_list[i], + IB_ACCESS_LOCAL_WRITE); + if (unlikely(ret <= 0)) goto bad_lkey; qp->r_len += wqe->sg_list[i].length; j++; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ea95672d9675..1b3801f78e78 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -779,7 +779,6 @@ out: /** * rvt_sge_adjacent - is isge compressible - * @isge: outgoing internal SGE * @last_sge: last outgoing SGE written * @sge: SGE to check * @@ -787,8 +786,7 @@ out: * * Return: true if isge is adjacent to last sge */ -static inline bool rvt_sge_adjacent(struct rvt_sge *isge, - struct rvt_sge *last_sge, +static inline bool rvt_sge_adjacent(struct rvt_sge *last_sge, struct ib_sge *sge) { if (last_sge && sge->lkey == last_sge->mr->lkey && @@ -840,7 +838,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, if (pd->user) return -EINVAL; - if (rvt_sge_adjacent(isge, last_sge, sge)) + if (rvt_sge_adjacent(last_sge, sge)) return 0; rcu_read_lock(); mr = rcu_dereference(dev->dma_mr); @@ -857,7 +855,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - if (rvt_sge_adjacent(isge, last_sge, sge)) + if (rvt_sge_adjacent(last_sge, sge)) return 0; rcu_read_lock(); mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 740611e4692a..1878a97364aa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1613,7 +1613,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); u8 log_pmtu; - int ret, incr; + int ret; size_t cplen; bool reserved_op; int local_ops_delayed = 0; @@ -1695,14 +1695,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp, if (length == 0) continue; - incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, - &wr->sg_list[i], acc); - if (unlikely(incr < 0)) - goto bail_lkey_error; + ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge, + &wr->sg_list[i], acc); + if (unlikely(ret < 0)) + goto bail_inval_free; wqe->length += length; - if (incr) + if (ret) last_sge = &wqe->sg_list[j]; - j += incr; + j += ret; } wqe->wr.num_sge = j; } @@ -1755,8 +1755,6 @@ static int rvt_post_one_wr(struct rvt_qp *qp, return 0; -bail_lkey_error: - ret = incr; bail_inval_free: /* release mr holds */ while (j) { -- cgit v1.2.3 From 71d47008ca1b2ab10e0432e72e572c7ce5d8d63b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:49 -0700 Subject: IB/hfi1: Create workqueue for link events Currently, link down interrupts queue link entries on a workqueue intended for sending events only. Create a workqueue for queuing link events. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 20 ++++++++++---------- drivers/infiniband/hw/hfi1/driver.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 1 + drivers/infiniband/hw/hfi1/init.c | 26 ++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/pio.c | 2 +- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 6 files changed, 40 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bd594e7b0078..8443d41c6e35 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -5545,7 +5545,7 @@ static void update_rcverr_timer(unsigned long opaque) set_link_down_reason( ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt; @@ -6100,7 +6100,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) * will not happen. We have to do it here * before turning the DC off. */ - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } else { dd_dev_info(dd, "%s: QSFP module inserted\n", @@ -6135,7 +6135,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg) /* Schedule the QSFP work only if there is a cable attached. */ if (qsfp_mod_present(ppd)) - queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work); + queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work); } static int request_host_lcb_access(struct hfi1_devdata *dd) @@ -7738,12 +7738,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)HOST_REQ_DONE; } if (host_msg & BC_SMA_MSG) { - queue_work(ppd->hfi1_wq, &ppd->sma_message_work); + queue_work(ppd->link_wq, &ppd->sma_message_work); host_msg &= ~(u64)BC_SMA_MSG; } if (host_msg & LINKUP_ACHIEVED) { dd_dev_info(dd, "8051: Link up\n"); - queue_work(ppd->hfi1_wq, &ppd->link_up_work); + queue_work(ppd->link_wq, &ppd->link_up_work); host_msg &= ~(u64)LINKUP_ACHIEVED; } if (host_msg & EXT_DEVICE_CFG_REQ) { @@ -7751,7 +7751,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)EXT_DEVICE_CFG_REQ; } if (host_msg & VERIFY_CAP_FRAME) { - queue_work(ppd->hfi1_wq, &ppd->link_vc_work); + queue_work(ppd->link_wq, &ppd->link_vc_work); host_msg &= ~(u64)VERIFY_CAP_FRAME; } if (host_msg & LINK_GOING_DOWN) { @@ -7766,7 +7766,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) host_msg &= ~(u64)LINK_GOING_DOWN; } if (host_msg & LINK_WIDTH_DOWNGRADED) { - queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work); + queue_work(ppd->link_wq, &ppd->link_downgrade_work); host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED; } if (host_msg) { @@ -7809,7 +7809,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { - queue_work(ppd->hfi1_wq, &ppd->link_down_work); + queue_work(ppd->link_wq, &ppd->link_down_work); } } } @@ -8017,7 +8017,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n", __func__); set_link_down_reason(ppd, lcl_reason, 0, lcl_reason); - queue_work(ppd->hfi1_wq, &ppd->link_bounce_work); + queue_work(ppd->link_wq, &ppd->link_bounce_work); } } @@ -9685,7 +9685,7 @@ static void try_start_link(struct hfi1_pportdata *ppd) "QSFP not responding, waiting and retrying %d\n", (int)ppd->qsfp_retry_count); ppd->qsfp_retry_count++; - queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + queue_delayed_work(ppd->link_wq, &ppd->start_link_work, msecs_to_jiffies(QSFP_RETRY_WAIT)); return; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01235d4757a5..0b7ca0e05320 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -914,7 +914,7 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, return 0; } - queue_work(rcd->ppd->hfi1_wq, lsaw); + queue_work(rcd->ppd->link_wq, lsaw); return 1; } return 0; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2d32c5c314a3..ee6c389f9515 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -576,6 +576,7 @@ struct hfi1_pportdata { /* SendDMA related entries */ struct workqueue_struct *hfi1_wq; + struct workqueue_struct *link_wq; /* move out of interrupt context */ struct work_struct link_vc_work; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 7f5e4c7dca89..be027c9c880e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -660,6 +660,20 @@ static int create_workqueues(struct hfi1_devdata *dd) if (!ppd->hfi1_wq) goto wq_error; } + if (!ppd->link_wq) { + /* + * Make the link workqueue single-threaded to enforce + * serialization. + */ + ppd->link_wq = + alloc_workqueue( + "hfi_link_%d_%d", + WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, + 1, /* max_active */ + dd->unit, pidx); + if (!ppd->link_wq) + goto wq_error; + } } return 0; wq_error: @@ -670,6 +684,10 @@ wq_error: destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } return -ENOMEM; } @@ -954,6 +972,10 @@ static void shutdown_device(struct hfi1_devdata *dd) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } sdma_exit(dd); } @@ -1575,6 +1597,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) destroy_workqueue(ppd->hfi1_wq); ppd->hfi1_wq = NULL; } + if (ppd->link_wq) { + destroy_workqueue(ppd->link_wq); + ppd->link_wq = NULL; + } } if (!j) hfi1_device_remove(dd); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index adb6a4da6107..7108a4b5e94c 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1012,7 +1012,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sc->sw_index, sc->hw_context, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 71b4258545c4..6781bcdb10b3 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -325,7 +325,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde, /* timed out - bounce the link */ dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n", __func__, sde->this_idx, (u32)reg); - queue_work(dd->pport->hfi1_wq, + queue_work(dd->pport->link_wq, &dd->pport->link_bounce_work); break; } -- cgit v1.2.3 From 626c077c025f9da6dce809b5a8300ed44ef02b6f Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:43:55 -0700 Subject: IB/hfi1: Prevent link down request double queuing When link interrupts occur, multiple link down requests could be queued up when only one is needed. This could get the hfi1 out of sync with its link partner during LNI. Only allow one link down request to be queued at any one time. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 +++- drivers/infiniband/hw/hfi1/hfi.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 8443d41c6e35..a3af46cbbf8d 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7045,6 +7045,7 @@ void handle_link_down(struct work_struct *work) /* Go offline first, then deal with reading/writing through 8051 */ was_up = !!(ppd->host_link_state & HLS_UP); set_link_state(ppd, HLS_DN_OFFLINE); + xchg(&ppd->is_link_down_queued, 0); if (was_up) { lcl_reason = 0; @@ -7805,10 +7806,11 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || - ppd->link_enabled == 0) { + ppd->link_enabled == 0 || ppd->is_link_down_queued) { dd_dev_info(dd, "%s: not queuing link down\n", __func__); } else { + xchg(&ppd->is_link_down_queued, 1); queue_work(ppd->link_wq, &ppd->link_down_work); } } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee6c389f9515..fb5f8394fbed 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -644,6 +644,7 @@ struct hfi1_pportdata { /* placeholders for IB MAD packet settings */ u8 overrun_threshold; u8 phy_error_threshold; + unsigned int is_link_down_queued; /* Used to override LED behavior for things like maintenance beaconing*/ /* -- cgit v1.2.3 From 913cc67159bc85a96d94df301ca39c1b2c540dca Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sat, 29 Jul 2017 08:44:01 -0700 Subject: IB/hfi1: Always perform offline transition Always initiate an offline transition request when a link down occurs. The firmware will use this request to confirm that the driver has seen the link down message. A host version is set to indicate this driver behavior to the firmware. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 66 +++++++++++++++++------------------ drivers/infiniband/hw/hfi1/chip.h | 5 +++ drivers/infiniband/hw/hfi1/firmware.c | 8 +++++ 3 files changed, 46 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index a3af46cbbf8d..101fbbbc2522 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8832,6 +8832,20 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, & REMOTE_DEVICE_REV_MASK; } +int write_host_interface_version(struct hfi1_devdata *dd, u8 version) +{ + u32 frame; + u32 mask; + + mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); + read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); + /* Clear, then set field */ + frame &= ~mask; + frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); + return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, + frame); +} + void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch) { @@ -10262,49 +10276,35 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) { struct hfi1_devdata *dd = ppd->dd; - u32 pstate, previous_state; + u32 previous_state; int ret; - int do_transition; - int do_wait; update_lcb_cache(dd); previous_state = ppd->host_link_state; ppd->host_link_state = HLS_GOING_OFFLINE; - pstate = read_physical_state(dd); - if (pstate == PLS_OFFLINE) { - do_transition = 0; /* in right state */ - do_wait = 0; /* ...no need to wait */ - } else if ((pstate & 0xf0) == PLS_OFFLINE) { - do_transition = 0; /* in an offline transient state */ - do_wait = 1; /* ...wait for it to settle */ - } else { - do_transition = 1; /* need to move to offline */ - do_wait = 1; /* ...will need to wait */ - } - if (do_transition) { - ret = set_physical_link_state(dd, - (rem_reason << 8) | PLS_OFFLINE); + /* start offline transition */ + ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE); - if (ret != HCMD_SUCCESS) { - dd_dev_err(dd, - "Failed to transition to Offline link state, return %d\n", - ret); - return -EINVAL; - } - if (ppd->offline_disabled_reason == - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) - ppd->offline_disabled_reason = - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to transition to Offline link state, return %d\n", + ret); + return -EINVAL; } + if (ppd->offline_disabled_reason == + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)) + ppd->offline_disabled_reason = + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); - if (do_wait) { - /* it can take a while for the link to go down */ - ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); - if (ret < 0) - return ret; - } + /* + * Wait for offline transition. It can take a while for + * the link to go down. + */ + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); + if (ret < 0) + return ret; /* * Now in charge of LCB - must be after the physical state is diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index bef6301d825d..6a0c691f1385 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -384,6 +384,7 @@ #define VERIFY_CAP_LOCAL_FABRIC 0x08 #define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 #define LOCAL_DEVICE_ID 0x0a +#define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c #define REMOTE_LNI_INFO 0x0d #define MISC_STATUS 0x0e @@ -506,6 +507,9 @@ #define DOWN_REMOTE_REASON_SHIFT 16 #define DOWN_REMOTE_REASON_MASK 0xff +#define HOST_INTERFACE_VERSION_SHIFT 16 +#define HOST_INTERFACE_VERSION_MASK 0xff + /* verify capability PHY power management bits */ #define PWRM_BER_CONTROL 0x1 #define PWRM_BANDWIDTH_CONTROL 0x2 @@ -704,6 +708,7 @@ int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result); /* chip.c */ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch); +int write_host_interface_version(struct hfi1_devdata *dd, u8 version); void read_guid(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 83efd6bec657..885714b93557 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -69,6 +69,7 @@ #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" +#define HOST_INTERFACE_VERSION 1 static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; @@ -1087,6 +1088,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd, dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", (int)ver_major, (int)ver_minor, (int)ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); + ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to set host interface version, return 0x%x\n", + ret); + return -EIO; + } return 0; } -- cgit v1.2.3 From d541e45500bd269060c26387902e1bec9783c07c Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:43 -0400 Subject: IB/core: Convert ah_attr from OPA to IB when copying to user OPA address handle atttibutes that have 32 bit LIDs would have to be converted to IB address handle attribute with the LID field programmed in the GID before copying to user space. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucm.c | 2 +- drivers/infiniband/core/ucma.c | 10 ++++--- drivers/infiniband/core/uverbs_marshall.c | 48 +++++++++++++++++++++++++++---- include/rdma/ib_marshall.h | 6 ++-- 4 files changed, 54 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 112099c86a19..f2a7f62c2834 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -618,7 +618,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, if (result) goto out; - ib_copy_qp_attr_to_user(&resp, &qp_attr); + ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 276f0ef835bd..eb85b546e223 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -248,14 +248,15 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, dst->qp_num = src->qp_num; } -static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, +static void ucma_copy_ud_event(struct ib_device *device, + struct rdma_ucm_ud_param *dst, struct rdma_ud_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); dst->qp_num = src->qp_num; dst->qkey = src->qkey; } @@ -335,7 +336,8 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, uevent->resp.event = event->event; uevent->resp.status = event->status; if (cm_id->qp_type == IB_QPT_UD) - ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); + ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, + &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); @@ -1157,7 +1159,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, if (ret) goto out; - ib_copy_qp_attr_to_user(&resp, &qp_attr); + ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 94fd989c9060..bd0acf376af0 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -33,10 +33,47 @@ #include #include -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, - struct rdma_ah_attr *src) +#define OPA_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +static int rdma_ah_conv_opa_to_ib(struct ib_device *dev, + struct rdma_ah_attr *ib, + struct rdma_ah_attr *opa) { + struct ib_port_attr port_attr; + int ret = 0; + + /* Do structure copy and the over-write fields */ + *ib = *opa; + + ib->type = RDMA_AH_ATTR_TYPE_IB; + rdma_ah_set_grh(ib, NULL, 0, 0, 1, 0); + + if (ib_query_port(dev, opa->port_num, &port_attr)) { + /* Set to default subnet to indicate error */ + rdma_ah_set_subnet_prefix(ib, OPA_DEFAULT_GID_PREFIX); + ret = -EINVAL; + } else { + rdma_ah_set_subnet_prefix(ib, + cpu_to_be64(port_attr.subnet_prefix)); + } + rdma_ah_set_interface_id(ib, OPA_MAKE_ID(rdma_ah_get_dlid(opa))); + return ret; +} + +void ib_copy_ah_attr_to_user(struct ib_device *device, + struct ib_uverbs_ah_attr *dst, + struct rdma_ah_attr *ah_attr) +{ + struct rdma_ah_attr *src = ah_attr; + struct rdma_ah_attr conv_ah; + memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + + if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && + (rdma_ah_get_dlid(ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr))) + src = &conv_ah; + dst->dlid = rdma_ah_get_dlid(src); dst->sl = rdma_ah_get_sl(src); dst->src_path_bits = rdma_ah_get_path_bits(src); @@ -57,7 +94,8 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, +void ib_copy_qp_attr_to_user(struct ib_device *device, + struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) { dst->qp_state = src->qp_state; @@ -76,8 +114,8 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, dst->max_recv_sge = src->cap.max_recv_sge; dst->max_inline_data = src->cap.max_inline_data; - ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); - ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); + ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr); + ib_copy_ah_attr_to_user(device, &dst->alt_ah_attr, &src->alt_ah_attr); dst->pkey_index = src->pkey_index; dst->alt_pkey_index = src->alt_pkey_index; diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h index 68cef3bd50fb..8ebf84ae9ed1 100644 --- a/include/rdma/ib_marshall.h +++ b/include/rdma/ib_marshall.h @@ -38,10 +38,12 @@ #include #include -void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, +void ib_copy_qp_attr_to_user(struct ib_device *device, + struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); -void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, +void ib_copy_ah_attr_to_user(struct ib_device *device, + struct ib_uverbs_ah_attr *dst, struct rdma_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, -- cgit v1.2.3 From 4c4736905e456819223c6e879ca8fe7cce877939 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:44 -0400 Subject: IB/srpt: Increase lid and sm_lid to 32 bits srpt contains lid and sm_lid fields which are 16 bits in length, increase them to 32 bits. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index cc1183851af5..1b817e51b84b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -328,8 +328,8 @@ struct srpt_port { u8 port_guid[24]; u8 port_gid[64]; u8 port; - u16 sm_lid; - u16 lid; + u32 sm_lid; + u32 lid; union ib_gid gid; struct work_struct work; struct se_portal_group port_guid_tpg; -- cgit v1.2.3 From 7e93e2cb835c4301663c1ac5e0135d2e06e1c5e8 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:45 -0400 Subject: IB/IPoIB: Increase local_lid to 32 bits IPoIB contains local_lid field which is 16 bits in length, increase it to 32 bits. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ff50a7bd66d8..9e738104c2a1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -366,7 +366,7 @@ struct ipoib_dev_priv { u32 qkey; union ib_gid local_gid; - u16 local_lid; + u32 local_lid; unsigned int admin_mtu; unsigned int mcast_mtu; -- cgit v1.2.3 From 1cb2fc0db764dae2c484dac5c93824003fe571fb Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:46 -0400 Subject: IB/mad: Change slid in RMPP recv from 16 to 32 bits MAD RMPP contains slid field which is 16 bits in length, increase it to 32 bits. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad_rmpp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 0d3cca0a8890..e5cf09c66fe6 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -64,7 +64,7 @@ struct mad_rmpp_recv { __be64 tid; u32 src_qp; - u16 slid; + u32 slid; u8 mgmt_class; u8 class_version; u8 method; -- cgit v1.2.3 From 582faf3150f57b8364ac9d2aa731d7368ada7a4b Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:47 -0400 Subject: IB/core: Change port_attr.lid size from 16 to 32 bits lid field in struct ib_port_attr is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 5 ++++- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- include/rdma/ib_verbs.h | 2 +- include/rdma/opa_addr.h | 3 ++- 7 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 11ae67514e13..6b54280530c9 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -38,6 +38,7 @@ #include #include +#include #include #include "mad_priv.h" diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..eef2623406cc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,8 +275,11 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.lid = attr.lid; resp.sm_lid = attr.sm_lid; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) + resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); + else + resp.lid = (u16)attr.lid; resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index ea24230ea0d4..5a897b0106a9 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev, memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - guid_info_rec.lid = cpu_to_be16(attr.lid); + guid_info_rec.lid = cpu_to_be16((u16)attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 21d31cb1325f..00f057033cb9 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df3db71777a..617531f1bfc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = pattr.lid; + prev_lid = (u16)pattr.lid; err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b5732432bb29..4fa94e69b1fc 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,8 +549,8 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 lid; u16 sm_lid; + u32 lid; u8 lmc; u8 max_vl_num; u8 sm_sl; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index eace28f1555d..46d0567fffea 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -50,7 +50,8 @@ #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) - +#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ + ? 0 : x) /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that -- cgit v1.2.3 From db58540b021a17e0ede64f761b740556d77f1679 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:37:48 -0400 Subject: IB/core: Change port_attr.sm_lid from 16 to 32 bits sm_lid field in struct ib_port_attr is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 8 +++++--- include/rdma/ib_verbs.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index eef2623406cc..01e2ff023980 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,11 +275,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; - resp.sm_lid = attr.sm_lid; - if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); - else + resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); + } else { resp.lid = (u16)attr.lid; + resp.sm_lid = (u16)attr.sm_lid; + } resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4fa94e69b1fc..620535908118 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -549,7 +549,7 @@ struct ib_port_attr { u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; - u16 sm_lid; + u32 sm_lid; u32 lid; u8 lmc; u8 max_vl_num; -- cgit v1.2.3 From 7db20ecd1d9700e2c240dee505162eb56ab55b5b Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Thu, 8 Jun 2017 13:37:49 -0400 Subject: IB/core: Change wc.slid from 16 to 32 bits slid field in struct ib_wc is increased to 32 bits. This enables core components to use larger LIDs if needed. The user ABI is unchanged and return 16 bit values when queried. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Ira Weiny Signed-off-by: Don Hiatt Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 10 +++++++--- drivers/infiniband/hw/hfi1/mad.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 6 +++--- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_mad.c | 2 +- drivers/infiniband/sw/rdmavt/cq.c | 2 +- include/rdma/ib_verbs.h | 14 +++++++++++++- 10 files changed, 32 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2b4d613a3474..b39ee16aa479 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1703,7 +1703,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = cpu_to_be16(wc->slid); + req_msg->primary_local_lid = ib_slid_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } @@ -1713,7 +1713,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = cpu_to_be16(wc->slid); + req_msg->alt_local_lid = ib_slid_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 36a6f5c8914c..ff3c67a7aaad 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 01e2ff023980..eb0da3784bf4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1190,7 +1190,8 @@ out: return ret ? ret : in_len; } -static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, + struct ib_wc *wc) { struct ib_uverbs_wc tmp; @@ -1204,7 +1205,10 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) tmp.src_qp = wc->src_qp; tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; - tmp.slid = wc->slid; + if (rdma_cap_opa_ah(ib_dev, wc->port_num)) + tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); + else + tmp.slid = ib_slid_cpu16(wc->slid); tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; @@ -1248,7 +1252,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, if (!ret) break; - ret = copy_wc_to_user(data_ptr, &wc); + ret = copy_wc_to_user(ib_dev, data_ptr, &wc); if (ret) goto out_put; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 5977673a52d4..00ebc26cd187 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3958,7 +3958,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = in_wc->slid; + u16 slid = ib_slid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 00f057033cb9..04fb44e7699e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, @@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); } else { tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid); } ib_dma_sync_single_for_device(&dev->ib_dev, @@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 95db929bdc34..cd2264ac88ae 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 9d83a53c0c67..e19ae0b9b439 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); - MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) @@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, op_modifier |= 0x4; - in_modifier |= in_wc->slid << 16; + in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 617531f1bfc6..a9caadab22cf 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev, u16 *out_mad_pkey_index) { int err; - u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; const struct ib_mad *in_mad = (const struct ib_mad *)in; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0ae2ff8cf81e..0335a3df74d5 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].slid = ib_slid_cpu16(entry->slid); wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 620535908118..7eaf7d2ab424 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -948,7 +948,7 @@ struct ib_wc { u32 src_qp; int wc_flags; u16 pkey_index; - u16 slid; + u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ @@ -3706,4 +3706,16 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, else return RDMA_AH_ATTR_TYPE_IB; } + +/* Return slid in 16bit CPU encoding */ +static inline u16 ib_slid_cpu16(u32 slid) +{ + return (u16)slid; +} + +/* Return slid in 16bit BE encoding */ +static inline u16 ib_slid_be16(u32 slid) +{ + return cpu_to_be16((u16)slid); +} #endif /* IB_VERBS_H */ -- cgit v1.2.3 From e92aa00a518971fca6b79aa87a1a9c5e5aa51f3b Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Thu, 8 Jun 2017 13:38:02 -0400 Subject: IB/CM: Add OPA Path record support to CM Add OPA path record support to the Connection Manager. Signed-off-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 50 +++++++++++++++++++++++++++++++++++++------- include/rdma/opa_addr.h | 18 ++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b39ee16aa479..885c429b4942 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1175,6 +1175,11 @@ static void cm_format_req(struct cm_req_msg *req_msg, { struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; + bool pri_ext = false; + + if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) + pri_ext = opa_is_extended_lid(pri_path->opa.dlid, + pri_path->opa.slid); cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); @@ -1202,18 +1207,24 @@ static void cm_format_req(struct cm_req_msg *req_msg, cm_req_set_srq(req_msg, param->srq); } + req_msg->primary_local_gid = pri_path->sgid; + req_msg->primary_remote_gid = pri_path->dgid; + if (pri_ext) { + req_msg->primary_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid)); + req_msg->primary_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid)); + } if (pri_path->hop_limit <= 1) { - req_msg->primary_local_lid = + req_msg->primary_local_lid = pri_ext ? 0 : htons(ntohl(sa_path_get_slid(pri_path))); - req_msg->primary_remote_lid = + req_msg->primary_remote_lid = pri_ext ? 0 : htons(ntohl(sa_path_get_dlid(pri_path))); } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; req_msg->primary_remote_lid = IB_LID_PERMISSIVE; } - req_msg->primary_local_gid = pri_path->sgid; - req_msg->primary_remote_gid = pri_path->dgid; cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); cm_req_set_primary_packet_rate(req_msg, pri_path->rate); req_msg->primary_traffic_class = pri_path->traffic_class; @@ -1225,17 +1236,29 @@ static void cm_format_req(struct cm_req_msg *req_msg, pri_path->packet_life_time)); if (alt_path) { + bool alt_ext = false; + + if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA) + alt_ext = opa_is_extended_lid(alt_path->opa.dlid, + alt_path->opa.slid); + + req_msg->alt_local_gid = alt_path->sgid; + req_msg->alt_remote_gid = alt_path->dgid; + if (alt_ext) { + req_msg->alt_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid)); + req_msg->alt_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid)); + } if (alt_path->hop_limit <= 1) { - req_msg->alt_local_lid = + req_msg->alt_local_lid = alt_ext ? 0 : htons(ntohl(sa_path_get_slid(alt_path))); - req_msg->alt_remote_lid = + req_msg->alt_remote_lid = alt_ext ? 0 : htons(ntohl(sa_path_get_dlid(alt_path))); } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; } - req_msg->alt_local_gid = alt_path->sgid; - req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, alt_path->flow_label); cm_req_set_alt_packet_rate(req_msg, alt_path->rate); @@ -2843,6 +2866,11 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, const void *private_data, u8 private_data_len) { + bool alt_ext = false; + + if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA) + alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, + alternate_path->opa.slid); cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); lap_msg->local_comm_id = cm_id_priv->id.local_id; @@ -2856,6 +2884,12 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg, htons(ntohl(sa_path_get_dlid(alternate_path))); lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; + if (alt_ext) { + lap_msg->alt_local_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid)); + lap_msg->alt_remote_gid.global.interface_id + = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid)); + } cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); lap_msg->alt_hop_limit = alternate_path->hop_limit; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 46d0567fffea..9b5e642cf550 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -77,4 +77,22 @@ static inline u32 opa_get_lid_from_gid(union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } + +/** + * opa_is_extended_lid: Returns true if dlid or slid are + * extended. + * + * @dlid: The DLID + * @slid: The SLID + */ +static inline bool opa_is_extended_lid(u32 dlid, u32 slid) +{ + if ((be32_to_cpu(dlid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (be32_to_cpu(slid) >= + be16_to_cpu(IB_MULTICAST_LID_BASE))) + return true; + else + return false; +} #endif /* OPA_ADDR_H */ -- cgit v1.2.3 From 6b3c0e6e6d5abfefb0112cd450e0aee97fcab7a8 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:38:03 -0400 Subject: IB/CM: Create appropriate path records when handling CM request When handling an incoming conection request, ib_cm creates either an IB or an OPA path record based on the gid field in the request. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 885c429b4942..4d870a0c2955 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1428,6 +1428,21 @@ static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid, (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn)))); } +static bool cm_req_has_alt_path(struct cm_req_msg *req_msg) +{ + return ((req_msg->alt_local_lid) || + (ib_is_opa_gid(&req_msg->alt_local_gid))); +} + +static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, + struct sa_path_rec *path, union ib_gid *gid) +{ + if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num)) + path->rec_type = SA_PATH_REC_TYPE_OPA; + else + path->rec_type = SA_PATH_REC_TYPE_IB; +} + static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, struct sa_path_rec *alt_path) @@ -1807,9 +1822,12 @@ static int cm_req_handler(struct cm_work *work) dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); } else { - work->path[0].rec_type = SA_PATH_REC_TYPE_IB; + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &req_msg->primary_local_gid); } - if (req_msg->alt_local_lid) + if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); @@ -1834,16 +1852,19 @@ static int cm_req_handler(struct cm_work *work) dev_net(gid_attr.ndev)); dev_put(gid_attr.ndev); } else { - work->path[0].rec_type = SA_PATH_REC_TYPE_IB; + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &req_msg->primary_local_gid); } - if (req_msg->alt_local_lid) + if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } - if (req_msg->alt_local_lid) { + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, cm_id_priv); if (ret) { @@ -2962,8 +2983,6 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { - memset(path, 0, sizeof *path); - path->rec_type = SA_PATH_REC_TYPE_IB; path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); @@ -2999,6 +3018,11 @@ static int cm_lap_handler(struct cm_work *work) return -EINVAL; param = &work->cm_event.param.lap_rcvd; + memset(&work->path[0], 0, sizeof(work->path[1])); + cm_path_set_rec_type(work->port->cm_dev->ib_device, + work->port->port_num, + &work->path[0], + &lap_msg->alt_local_gid); param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; -- cgit v1.2.3 From ac3a949fb2fff36bebdc4fab90567ed349ea7245 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Thu, 8 Jun 2017 13:38:04 -0400 Subject: IB/CM: Set appropriate slid and dlid when handling CM request If extended LIDs are being used, a connection request contains OPA GIDs in them. Extract the lids from the OPA gids and populate slid/dlid fields in the path records that are created when handling a connection request. Signed-off-by: Dasaratharaman Chandramouli Reviewed-by: Don Hiatt Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 67 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 4d870a0c2955..d5ca101057b7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1443,16 +1443,48 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num, path->rec_type = SA_PATH_REC_TYPE_IB; } +static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, + struct sa_path_rec *primary_path, + struct sa_path_rec *alt_path) +{ + u32 lid; + + if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(primary_path, + htonl(ntohs(req_msg->primary_local_lid))); + sa_path_set_slid(primary_path, + htonl(ntohs(req_msg->primary_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&req_msg->primary_local_gid); + sa_path_set_dlid(primary_path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid); + sa_path_set_slid(primary_path, cpu_to_be32(lid)); + } + + if (!cm_req_has_alt_path(req_msg)) + return; + + if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(alt_path, + htonl(ntohs(req_msg->alt_local_lid))); + sa_path_set_slid(alt_path, + htonl(ntohs(req_msg->alt_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&req_msg->alt_local_gid); + sa_path_set_dlid(alt_path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid); + sa_path_set_slid(alt_path, cpu_to_be32(lid)); + } +} + static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, struct sa_path_rec *alt_path) { primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; - sa_path_set_dlid(primary_path, - htonl(ntohs(req_msg->primary_local_lid))); - sa_path_set_slid(primary_path, - htonl(ntohs(req_msg->primary_remote_lid))); primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; @@ -1469,13 +1501,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time -= (primary_path->packet_life_time > 0); primary_path->service_id = req_msg->service_id; - if (req_msg->alt_local_lid) { + if (cm_req_has_alt_path(req_msg)) { alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; - sa_path_set_dlid(alt_path, - htonl(ntohs(req_msg->alt_local_lid))); - sa_path_set_slid(alt_path, - htonl(ntohs(req_msg->alt_remote_lid))); alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; @@ -1492,6 +1520,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time -= (alt_path->packet_life_time > 0); alt_path->service_id = req_msg->service_id; } + cm_format_path_lid_from_req(req_msg, primary_path, alt_path); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -2979,14 +3008,29 @@ out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); } EXPORT_SYMBOL(ib_send_cm_lap); +static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg, + struct sa_path_rec *path) +{ + u32 lid; + + if (path->rec_type != SA_PATH_REC_TYPE_OPA) { + sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); + sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); + } else { + lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid); + sa_path_set_dlid(path, cpu_to_be32(lid)); + + lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid); + sa_path_set_slid(path, cpu_to_be32(lid)); + } +} + static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct sa_path_rec *path, struct cm_lap_msg *lap_msg) { path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; - sa_path_set_dlid(path, htonl(ntohs(lap_msg->alt_local_lid))); - sa_path_set_slid(path, htonl(ntohs(lap_msg->alt_remote_lid))); path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); @@ -3000,6 +3044,7 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); path->packet_life_time -= (path->packet_life_time > 0); + cm_format_path_lid_from_lap(lap_msg, path); } static int cm_lap_handler(struct cm_work *work) -- cgit v1.2.3 From 40b24403f33ed8e9401b087e25f46d002fc8f396 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 13 Jul 2017 11:09:42 +0300 Subject: mlx5: support ->get_vector_affinity Simply refer to the generic affinity mask helper. Reviewed-by: Christoph Hellwig Acked-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a7f2e60085c4..a6d3bcfbf229 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3556,6 +3556,14 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, return netdev; } +const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev, + int comp_vector) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_get_vector_affinity(dev->mdev, comp_vector); +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { struct mlx5_ib_dev *dev; @@ -3686,6 +3694,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; + dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) dev->ib_dev.alloc_rdma_netdev = mlx5_ib_alloc_rdma_netdev; -- cgit v1.2.3 From 9047811b776ce09ba06623dd2a846cc501f0065b Mon Sep 17 00:00:00 2001 From: "Ismail, Mustafa" Date: Wed, 28 Jun 2017 09:02:45 -0500 Subject: RDMA/core: Add wait/retry version of ibnl_unicast Add a wait/retry version of ibnl_unicast, ibnl_unicast_wait, and modify ibnl_unicast to not wait/retry. This eliminates the undesirable wait for future users of ibnl_unicast. Change Portmapper calls originating from kernel to user-space to use ibnl_unicast_wait and take advantage of the wait/retry logic in netlink_unicast. Signed-off-by: Mustafa Ismail Signed-off-by: Chien Tin Tung Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/iwpm_msg.c | 6 +++--- drivers/infiniband/core/netlink.c | 12 +++++++++++- include/rdma/rdma_netlink.h | 10 ++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index a0e7c16d8bd8..add99b92afdf 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -174,7 +174,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) goto add_mapping_error; nlmsg_request->req_buffer = pm_msg; - ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; @@ -251,7 +251,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) goto query_mapping_error; nlmsg_request->req_buffer = pm_msg; - ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ err_str = "Unable to send a nlmsg"; @@ -312,7 +312,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) if (ret) goto remove_mapping_error; - ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 94931c474d41..0fc50e15ae22 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -232,11 +232,21 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, { int err; - err = netlink_unicast(nls, skb, pid, 0); + err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); return (err < 0) ? err : 0; } EXPORT_SYMBOL(ibnl_unicast); +int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, + __u32 pid) +{ + int err; + + err = netlink_unicast(nls, skb, pid, 0); + return (err < 0) ? err : 0; +} +EXPORT_SYMBOL(ibnl_unicast_wait); + int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags) { diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 348c102cb5f6..5b1466770917 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -63,6 +63,16 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, __u32 pid); +/** + * Send, with wait/1 retry, the supplied skb to a specific userspace PID. + * @skb: The netlink skb + * @nlh: Header of the netlink message to send + * @pid: Userspace netlink process ID + * Returns 0 on success or a negative error code. + */ +int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, + __u32 pid); + /** * Send the supplied skb to a netlink group. * @skb: The netlink skb -- cgit v1.2.3 From c9901724a2f14128ef6a57986babcbfbcf61a257 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2017 10:20:11 +0300 Subject: RDMA/netlink: Remove netlink clients infrastructure RDMA netlink has a complicated infrastructure for dynamically registering and de-registering netlink clients to the NETLINK_RDMA group. The complicated portion of this code is not widely used because 2 of the 3 current clients are statically compiled together with netlink.c. The infrastructure, therefore, is deemed overkill. Refactor the code to eliminate the dynamically added clients. Now all clients are pre-registered in a client array at compile time, and at run time they merely check-in with the infrastructure to pass their callback table for inclusion in the pre-sized client array. This also allows for future cleanups and removal of unneeded code in the iwcm* netlink handler. Signed-off-by: Leon Romanovsky Reviewed-by: Chien Tin Tung --- drivers/infiniband/core/cma.c | 6 +- drivers/infiniband/core/core_priv.h | 4 +- drivers/infiniband/core/device.c | 45 +++------ drivers/infiniband/core/iwcm.c | 10 +- drivers/infiniband/core/netlink.c | 185 +++++++++++++++++------------------- include/rdma/rdma_netlink.h | 13 +-- 6 files changed, 112 insertions(+), 151 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ca4135c596ba..2a16a559bdda 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4512,9 +4512,7 @@ static int __init cma_init(void) if (ret) goto err; - if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table), - cma_cb_table)) - pr_warn("RDMA CMA: failed to add netlink callback\n"); + rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table); cma_configfs_init(); return 0; @@ -4531,7 +4529,7 @@ err_wq: static void __exit cma_cleanup(void) { cma_configfs_exit(); - ibnl_remove_client(RDMA_NL_RDMA_CM); + rdma_nl_unregister(RDMA_NL_RDMA_CM); ib_unregister_client(&cma_client); unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 11ae67514e13..e759c27113cd 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -179,8 +179,8 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); -int ibnl_init(void); -void ibnl_cleanup(void); +int rdma_nl_init(void); +void rdma_nl_exit(void); /** * Check if there are any listeners to the netlink group diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a5dfab6adf49..d0994cd30eae 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1086,29 +1086,15 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); -static struct ibnl_client_cbs ibnl_ls_cb_table[] = { +static const struct ibnl_client_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { - .dump = ib_nl_handle_resolve_resp, - .module = THIS_MODULE }, + .dump = ib_nl_handle_resolve_resp}, [RDMA_NL_LS_OP_SET_TIMEOUT] = { - .dump = ib_nl_handle_set_timeout, - .module = THIS_MODULE }, + .dump = ib_nl_handle_set_timeout}, [RDMA_NL_LS_OP_IP_RESOLVE] = { - .dump = ib_nl_handle_ip_res_resp, - .module = THIS_MODULE }, + .dump = ib_nl_handle_ip_res_resp}, }; -static int ib_add_ibnl_clients(void) -{ - return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table), - ibnl_ls_cb_table); -} - -static void ib_remove_ibnl_clients(void) -{ - ibnl_remove_client(RDMA_NL_LS); -} - static int __init ib_core_init(void) { int ret; @@ -1130,9 +1116,9 @@ static int __init ib_core_init(void) goto err_comp; } - ret = ibnl_init(); + ret = rdma_nl_init(); if (ret) { - pr_warn("Couldn't init IB netlink interface\n"); + pr_warn("Couldn't init IB netlink interface: err %d\n", ret); goto err_sysfs; } @@ -1154,24 +1140,17 @@ static int __init ib_core_init(void) goto err_mad; } - ret = ib_add_ibnl_clients(); - if (ret) { - pr_warn("Couldn't register ibnl clients\n"); - goto err_sa; - } - ret = register_lsm_notifier(&ibdev_lsm_nb); if (ret) { pr_warn("Couldn't register LSM notifier. ret %d\n", ret); - goto err_ibnl_clients; + goto err_sa; } + rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); ib_cache_setup(); return 0; -err_ibnl_clients: - ib_remove_ibnl_clients(); err_sa: ib_sa_cleanup(); err_mad: @@ -1179,7 +1158,7 @@ err_mad: err_addr: addr_cleanup(); err_ibnl: - ibnl_cleanup(); + rdma_nl_exit(); err_sysfs: class_unregister(&ib_class); err_comp: @@ -1191,13 +1170,13 @@ err: static void __exit ib_core_cleanup(void) { - unregister_lsm_notifier(&ibdev_lsm_nb); ib_cache_cleanup(); - ib_remove_ibnl_clients(); + rdma_nl_unregister(RDMA_NL_LS); + unregister_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); ib_mad_cleanup(); addr_cleanup(); - ibnl_cleanup(); + rdma_nl_exit(); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 31661b5c1743..8599271d8be6 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1175,12 +1175,8 @@ static int __init iw_cm_init(void) ret = iwpm_init(RDMA_NL_IWCM); if (ret) pr_err("iw_cm: couldn't init iwpm\n"); - - ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table), - iwcm_nl_cb_table); - if (ret) - pr_err("iw_cm: couldn't register netlink callbacks\n"); - + else + rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) return -ENOMEM; @@ -1200,7 +1196,7 @@ static void __exit iw_cm_cleanup(void) { unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); - ibnl_remove_client(RDMA_NL_IWCM); + rdma_nl_unregister(RDMA_NL_IWCM); iwpm_exit(RDMA_NL_IWCM); } diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 0fc50e15ae22..06f7ba31fbdd 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -39,16 +39,13 @@ #include #include "core_priv.h" -struct ibnl_client { - struct list_head list; - int index; - int nops; - const struct ibnl_client_cbs *cb_table; -}; +#include "core_priv.h" -static DEFINE_MUTEX(ibnl_mutex); +static DEFINE_MUTEX(rdma_nl_mutex); static struct sock *nls; -static LIST_HEAD(client_list); +static struct { + const struct ibnl_client_cbs *cb_table; +} rdma_nl_types[RDMA_NL_NUM_CLIENTS]; int ibnl_chk_listeners(unsigned int group) { @@ -57,58 +54,74 @@ int ibnl_chk_listeners(unsigned int group) return 0; } -int ibnl_add_client(int index, int nops, - const struct ibnl_client_cbs cb_table[]) +static bool is_nl_msg_valid(unsigned int type, unsigned int op) { - struct ibnl_client *cur; - struct ibnl_client *nl_client; + static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS - 1] = { + RDMA_NL_RDMA_CM_NUM_OPS, + RDMA_NL_IWPM_NUM_OPS, + 0, + RDMA_NL_LS_NUM_OPS, + 0 }; - nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL); - if (!nl_client) - return -ENOMEM; + /* + * This BUILD_BUG_ON is intended to catch addition of new + * RDMA netlink protocol without updating the array above. + */ + BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6); - nl_client->index = index; - nl_client->nops = nops; - nl_client->cb_table = cb_table; + if (type > RDMA_NL_NUM_CLIENTS - 1) + return false; - mutex_lock(&ibnl_mutex); + return (op < max_num_ops[type - 1]) ? true : false; +} - list_for_each_entry(cur, &client_list, list) { - if (cur->index == index) { - pr_warn("Client for %d already exists\n", index); - mutex_unlock(&ibnl_mutex); - kfree(nl_client); - return -EINVAL; - } - } +static bool is_nl_valid(unsigned int type, unsigned int op) +{ + if (!is_nl_msg_valid(type, op) || + !rdma_nl_types[type].cb_table || + !rdma_nl_types[type].cb_table[op].dump) + return false; + return true; +} - list_add_tail(&nl_client->list, &client_list); +void rdma_nl_register(unsigned int index, + const struct ibnl_client_cbs cb_table[]) +{ + mutex_lock(&rdma_nl_mutex); + if (!is_nl_msg_valid(index, 0)) { + /* + * All clients are not interesting in success/failure of + * this call. They want to see the print to error log and + * continue their initialization. Print warning for them, + * because it is programmer's error to be here. + */ + mutex_unlock(&rdma_nl_mutex); + WARN(true, + "The not-valid %u index was supplied to RDMA netlink\n", + index); + return; + } - mutex_unlock(&ibnl_mutex); + if (rdma_nl_types[index].cb_table) { + mutex_unlock(&rdma_nl_mutex); + WARN(true, + "The %u index is already registered in RDMA netlink\n", + index); + return; + } - return 0; + rdma_nl_types[index].cb_table = cb_table; + mutex_unlock(&rdma_nl_mutex); } -EXPORT_SYMBOL(ibnl_add_client); +EXPORT_SYMBOL(rdma_nl_register); -int ibnl_remove_client(int index) +void rdma_nl_unregister(unsigned int index) { - struct ibnl_client *cur, *next; - - mutex_lock(&ibnl_mutex); - list_for_each_entry_safe(cur, next, &client_list, list) { - if (cur->index == index) { - list_del(&(cur->list)); - mutex_unlock(&ibnl_mutex); - kfree(cur); - return 0; - } - } - pr_warn("Can't remove callback for client idx %d. Not found\n", index); - mutex_unlock(&ibnl_mutex); - - return -EINVAL; + mutex_lock(&rdma_nl_mutex); + rdma_nl_types[index].cb_table = NULL; + mutex_unlock(&rdma_nl_mutex); } -EXPORT_SYMBOL(ibnl_remove_client); +EXPORT_SYMBOL(rdma_nl_unregister); void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int len, int client, int op, int flags) @@ -149,45 +162,31 @@ EXPORT_SYMBOL(ibnl_put_attr); static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct ibnl_client *client; int type = nlh->nlmsg_type; - int index = RDMA_NL_GET_CLIENT(type); + unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); + struct netlink_callback cb = {}; + struct netlink_dump_control c = {}; - list_for_each_entry(client, &client_list, list) { - if (client->index == index) { - if (op >= client->nops || !client->cb_table[op].dump) - return -EINVAL; - - /* - * For response or local service set_timeout request, - * there is no need to use netlink_dump_start. - */ - if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || - (index == RDMA_NL_LS && - op == RDMA_NL_LS_OP_SET_TIMEOUT)) { - struct netlink_callback cb = { - .skb = skb, - .nlh = nlh, - .dump = client->cb_table[op].dump, - .module = client->cb_table[op].module, - }; - - return cb.dump(skb, &cb); - } - - { - struct netlink_dump_control c = { - .dump = client->cb_table[op].dump, - .module = client->cb_table[op].module, - }; - return netlink_dump_start(nls, skb, nlh, &c); - } - } + if (!is_nl_valid(index, op)) + return -EINVAL; + + /* + * For response or local service set_timeout request, + * there is no need to use netlink_dump_start. + */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || + (index == RDMA_NL_LS && op == RDMA_NL_LS_OP_SET_TIMEOUT)) { + cb.skb = skb; + cb.nlh = nlh; + cb.dump = rdma_nl_types[index].cb_table[op].dump; + cb.module = rdma_nl_types[index].cb_table[op].module; + return cb.dump(skb, &cb); } - pr_info("Index %d wasn't found in client list\n", index); - return -EINVAL; + c.dump = rdma_nl_types[index].cb_table[op].dump; + c.module = rdma_nl_types[index].cb_table[op].module; + return netlink_dump_start(nls, skb, nlh, &c); } static void ibnl_rcv_reply_skb(struct sk_buff *skb) @@ -221,10 +220,10 @@ static void ibnl_rcv_reply_skb(struct sk_buff *skb) static void ibnl_rcv(struct sk_buff *skb) { - mutex_lock(&ibnl_mutex); + mutex_lock(&rdma_nl_mutex); ibnl_rcv_reply_skb(skb); netlink_rcv_skb(skb, &ibnl_rcv_msg); - mutex_unlock(&ibnl_mutex); + mutex_unlock(&rdma_nl_mutex); } int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -254,32 +253,26 @@ int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, } EXPORT_SYMBOL(ibnl_multicast); -int __init ibnl_init(void) +int __init rdma_nl_init(void) { struct netlink_kernel_cfg cfg = { .input = ibnl_rcv, }; nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); - if (!nls) { - pr_warn("Failed to create netlink socket\n"); + if (!nls) return -ENOMEM; - } nls->sk_sndtimeo = 10 * HZ; return 0; } -void ibnl_cleanup(void) +void rdma_nl_exit(void) { - struct ibnl_client *cur, *next; + int idx; - mutex_lock(&ibnl_mutex); - list_for_each_entry_safe(cur, next, &client_list, list) { - list_del(&(cur->list)); - kfree(cur); - } - mutex_unlock(&ibnl_mutex); + for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) + rdma_nl_unregister(idx); netlink_kernel_release(nls); } diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 5b1466770917..aadf0ab963b2 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -11,23 +11,18 @@ struct ibnl_client_cbs { }; /** - * Add a a client to the list of IB netlink exporters. + * Register client in RDMA netlink. * @index: Index of the added client - * @nops: Number of supported ops by the added client. * @cb_table: A table for op->callback - * - * Returns 0 on success or a negative error code. */ -int ibnl_add_client(int index, int nops, - const struct ibnl_client_cbs cb_table[]); +void rdma_nl_register(unsigned int index, + const struct ibnl_client_cbs cb_table[]); /** * Remove a client from IB netlink. * @index: Index of the removed IB client. - * - * Returns 0 on success or a negative error code. */ -int ibnl_remove_client(int index); +void rdma_nl_unregister(unsigned int index); /** * Put a new message in a supplied skb. -- cgit v1.2.3 From 64401b69b29164c5731018cc44fc9b144ac9c5ae Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 30 May 2017 11:29:56 +0300 Subject: RDMA/netlink: Remove redundant owner option for netlink callbacks Owner field is not needed to be set because netlink is part of ib_core which will be unloaded last after all other modules are unloaded. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 3 +-- drivers/infiniband/core/netlink.c | 2 -- include/rdma/rdma_netlink.h | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2a16a559bdda..0c85f140e616 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4459,8 +4459,7 @@ out: } static const struct ibnl_client_cbs cma_cb_table[] = { - [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, - .module = THIS_MODULE }, + [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, }; static int cma_init_net(struct net *net) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 06f7ba31fbdd..cd9b7e7b7d2c 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -180,12 +180,10 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, cb.skb = skb; cb.nlh = nlh; cb.dump = rdma_nl_types[index].cb_table[op].dump; - cb.module = rdma_nl_types[index].cb_table[op].module; return cb.dump(skb, &cb); } c.dump = rdma_nl_types[index].cb_table[op].dump; - c.module = rdma_nl_types[index].cb_table[op].module; return netlink_dump_start(nls, skb, nlh, &c); } diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index aadf0ab963b2..c124d8e43fc8 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -7,7 +7,6 @@ struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); - struct module *module; }; /** -- cgit v1.2.3 From 3c3e75d5ff75f9a076cac254fd32476ca80fdffc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 8 Jun 2017 09:05:12 +0300 Subject: RDMA/netlink: Avoid double pass for RDMA netlink messages The standard netlink_rcv_skb function skips messages without NLM_F_REQUEST flag in it, while SA netlink client issues them. In commit bc10ed7d3d19 ("IB/core: Add rdma netlink helper functions") the local function was introduced to allow such messages. This led to double pass for every incoming message. In this patch, we unify that local implementation and netlink_rcv_skb functions, so there will be no need for double pass anymore. As a outcome, this combined function gained more strict check for NLM_F_REQUEST flag and it is now allowed for SA pathquery client only. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/netlink.c | 62 +++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index cd9b7e7b7d2c..826fbd612c7d 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -159,8 +159,8 @@ nla_put_failure: } EXPORT_SYMBOL(ibnl_put_attr); -static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { int type = nlh->nlmsg_type; unsigned int index = RDMA_NL_GET_CLIENT(type); @@ -187,40 +187,66 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, return netlink_dump_start(nls, skb, nlh, &c); } -static void ibnl_rcv_reply_skb(struct sk_buff *skb) +/* + * This function is similar to netlink_rcv_skb with one exception: + * It calls to the callback for the netlink messages without NLM_F_REQUEST + * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed + * for that consumer only. + */ +static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, + struct nlmsghdr *, + struct netlink_ext_ack *)) { + struct netlink_ext_ack extack = {}; struct nlmsghdr *nlh; - int msglen; + int err; - /* - * Process responses until there is no more message or the first - * request. Generally speaking, it is not recommended to mix responses - * with requests. - */ while (skb->len >= nlmsg_total_size(0)) { + int msglen; + nlh = nlmsg_hdr(skb); + err = 0; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) - return; + return 0; - /* Handle response only */ - if (nlh->nlmsg_flags & NLM_F_REQUEST) - return; + /* + * Generally speaking, the only requests are handled + * by the kernel, but RDMA_NL_LS is different, because it + * runs backward netlink scheme. Kernel initiates messages + * and waits for reply with data to keep pathrecord cache + * in sync. + */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST) && + (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS)) + goto ack; + + /* Skip control messages */ + if (nlh->nlmsg_type < NLMSG_MIN_TYPE) + goto ack; - ibnl_rcv_msg(skb, nlh, NULL); + err = cb(skb, nlh, &extack); + if (err == -EINTR) + goto skip; +ack: + if (nlh->nlmsg_flags & NLM_F_ACK || err) + netlink_ack(skb, nlh, err, &extack); + +skip: msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } + + return 0; } -static void ibnl_rcv(struct sk_buff *skb) +static void rdma_nl_rcv(struct sk_buff *skb) { mutex_lock(&rdma_nl_mutex); - ibnl_rcv_reply_skb(skb); - netlink_rcv_skb(skb, &ibnl_rcv_msg); + rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg); mutex_unlock(&rdma_nl_mutex); } @@ -254,7 +280,7 @@ EXPORT_SYMBOL(ibnl_multicast); int __init rdma_nl_init(void) { struct netlink_kernel_cfg cfg = { - .input = ibnl_rcv, + .input = rdma_nl_rcv, }; nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); -- cgit v1.2.3 From 93fa50760b99aa12950421d0f3d2631694ca3e74 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 1 Jun 2017 12:42:36 +0300 Subject: RDMA/iwcm: Remove useless check of netlink client validity RDMA netlink implementation guarantees that supplied client number is in allowed range. Signed-off-by: Leon Romanovsky Reviewed-by: Chien Tin Tung --- drivers/infiniband/core/iwpm_util.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index f13870e69ccd..32ca2aaa4e3b 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -54,8 +54,6 @@ static struct iwpm_admin_data iwpm_admin; int iwpm_init(u8 nl_client) { int ret = 0; - if (iwpm_valid_client(nl_client)) - return -EINVAL; mutex_lock(&iwpm_admin_lock); if (atomic_read(&iwpm_admin.refcount) == 0) { iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE * @@ -383,15 +381,11 @@ int iwpm_get_nlmsg_seq(void) int iwpm_valid_client(u8 nl_client) { - if (nl_client >= RDMA_NL_NUM_CLIENTS) - return 0; return iwpm_admin.client_list[nl_client]; } void iwpm_set_valid(u8 nl_client, int valid) { - if (nl_client >= RDMA_NL_NUM_CLIENTS) - return; iwpm_admin.client_list[nl_client] = valid; } -- cgit v1.2.3 From 5d7ee40907507e8439044f415f3e1de216a26dea Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 1 Jun 2017 11:59:44 +0300 Subject: RDMA/iwcm: Remove extra EXPORT_SYMBOLS The iwcm exports functions which are not used outside of ib_core. This patch simply removes these EXPORT_SYMBOLS. Signed-off-by: Leon Romanovsky Reviewed-by: Chien Tin Tung --- drivers/infiniband/core/iwpm_msg.c | 12 ------------ drivers/infiniband/core/iwpm_util.c | 5 ----- 2 files changed, 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index add99b92afdf..45de263305f5 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -42,7 +42,6 @@ int iwpm_valid_pid(void) { return iwpm_user_pid > 0; } -EXPORT_SYMBOL(iwpm_valid_pid); /* * iwpm_register_pid - Send a netlink query to user space @@ -122,7 +121,6 @@ pid_query_error: iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; } -EXPORT_SYMBOL(iwpm_register_pid); /* * iwpm_add_mapping - Send a netlink add mapping message @@ -191,7 +189,6 @@ add_mapping_error: iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; } -EXPORT_SYMBOL(iwpm_add_mapping); /* * iwpm_add_and_query_mapping - Send a netlink add and query @@ -267,7 +264,6 @@ query_mapping_error: iwpm_free_nlmsg_request(&nlmsg_request->kref); return ret; } -EXPORT_SYMBOL(iwpm_add_and_query_mapping); /* * iwpm_remove_mapping - Send a netlink remove mapping message @@ -328,7 +324,6 @@ remove_mapping_error: dev_kfree_skb_any(skb); return ret; } -EXPORT_SYMBOL(iwpm_remove_mapping); /* netlink attribute policy for the received response to register pid request */ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { @@ -397,7 +392,6 @@ register_pid_response_exit: up(&nlmsg_request->sem); return 0; } -EXPORT_SYMBOL(iwpm_register_pid_cb); /* netlink attribute policy for the received response to add mapping request */ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { @@ -466,7 +460,6 @@ add_mapping_response_exit: up(&nlmsg_request->sem); return 0; } -EXPORT_SYMBOL(iwpm_add_mapping_cb); /* netlink attribute policy for the response to add and query mapping request * and response with remote address info */ @@ -558,7 +551,6 @@ query_mapping_response_exit: up(&nlmsg_request->sem); return 0; } -EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); /* * iwpm_remote_info_cb - Process a port mapper message, containing @@ -627,7 +619,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb) "remote_info: Mapped remote sockaddr:"); return ret; } -EXPORT_SYMBOL(iwpm_remote_info_cb); /* netlink attribute policy for the received request for mapping info */ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { @@ -677,7 +668,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid); return ret; } -EXPORT_SYMBOL(iwpm_mapping_info_cb); /* netlink attribute policy for the received mapping info ack */ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { @@ -707,7 +697,6 @@ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); return 0; } -EXPORT_SYMBOL(iwpm_ack_mapping_info_cb); /* netlink attribute policy for the received port mapper error message */ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { @@ -751,4 +740,3 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) up(&nlmsg_request->sem); return 0; } -EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 32ca2aaa4e3b..c46442ac71a2 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -81,7 +81,6 @@ init_exit: } return ret; } -EXPORT_SYMBOL(iwpm_init); static void free_hash_bucket(void); static void free_reminfo_bucket(void); @@ -107,7 +106,6 @@ int iwpm_exit(u8 nl_client) iwpm_set_registration(nl_client, IWPM_REG_UNDEF); return 0; } -EXPORT_SYMBOL(iwpm_exit); static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *, struct sockaddr_storage *); @@ -146,7 +144,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } -EXPORT_SYMBOL(iwpm_create_mapinfo); int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, struct sockaddr_storage *mapped_local_addr) @@ -182,7 +179,6 @@ remove_mapinfo_exit: spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); return ret; } -EXPORT_SYMBOL(iwpm_remove_mapinfo); static void free_hash_bucket(void) { @@ -295,7 +291,6 @@ get_remote_info_exit: spin_unlock_irqrestore(&iwpm_reminfo_lock, flags); return ret; } -EXPORT_SYMBOL(iwpm_get_remote_info); struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) -- cgit v1.2.3 From e3a2b93dddad315f01a4b67faee738954c084072 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 12 Jun 2017 16:00:19 +0300 Subject: RDMA/netlink: Add flag to consolidate common handling Add ability to provide flags to control RDMA netlink callbacks and convert addr.c and sa_query.c to be first users of such infrastructure. It allows to move their CAP_NET_ADMIN checks into netlink core. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/addr.c | 3 +-- drivers/infiniband/core/device.c | 12 +++++++++--- drivers/infiniband/core/netlink.c | 4 ++++ drivers/infiniband/core/sa_query.c | 6 ++---- include/rdma/rdma_netlink.h | 6 ++++++ 5 files changed, 22 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 01236cef7bfb..9f3339861ec5 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -134,8 +134,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || - !(NETLINK_CB(skb).sk) || - !netlink_capable(skb, CAP_NET_ADMIN)) + !(NETLINK_CB(skb).sk)) return -EPERM; if (ib_nl_is_good_ip_resp(nlh)) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d0994cd30eae..7ae29cc49a5e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1088,11 +1088,17 @@ EXPORT_SYMBOL(ib_get_net_dev_by_params); static const struct ibnl_client_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { - .dump = ib_nl_handle_resolve_resp}, + .dump = ib_nl_handle_resolve_resp, + .flags = RDMA_NL_ADMIN_PERM, + }, [RDMA_NL_LS_OP_SET_TIMEOUT] = { - .dump = ib_nl_handle_set_timeout}, + .dump = ib_nl_handle_set_timeout, + .flags = RDMA_NL_ADMIN_PERM, + }, [RDMA_NL_LS_OP_IP_RESOLVE] = { - .dump = ib_nl_handle_ip_res_resp}, + .dump = ib_nl_handle_ip_res_resp, + .flags = RDMA_NL_ADMIN_PERM, + }, }; static int __init ib_core_init(void) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 826fbd612c7d..c5ee62a24960 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -171,6 +171,10 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (!is_nl_valid(index, op)) return -EINVAL; + if ((rdma_nl_types[index].cb_table[op].flags & RDMA_NL_ADMIN_PERM) && + !netlink_capable(skb, CAP_NET_ADMIN)) + return -EPERM; + /* * For response or local service set_timeout request, * there is no need to use netlink_dump_start. diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 70fa4cabe48e..b499f4422f41 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1033,8 +1033,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, int ret; if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || - !(NETLINK_CB(skb).sk) || - !netlink_capable(skb, CAP_NET_ADMIN)) + !(NETLINK_CB(skb).sk)) return -EPERM; ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), @@ -1109,8 +1108,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || - !(NETLINK_CB(skb).sk) || - !netlink_capable(skb, CAP_NET_ADMIN)) + !(NETLINK_CB(skb).sk)) return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index c124d8e43fc8..6ea36ec45401 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -7,6 +7,12 @@ struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); + u8 flags; +}; + +enum rdma_nl_flags { + /* Require CAP_NET_ADMIN */ + RDMA_NL_ADMIN_PERM = 1 << 0, }; /** -- cgit v1.2.3 From 1a1c116f3dcf1658ee1e395bd13a556187d547f2 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 16:38:04 +0300 Subject: RDMA/netlink: Simplify the put_msg and put_attr Reuse standard macros to cancel the netlink message in case of error. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index c5ee62a24960..e2395a1d9f45 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -126,36 +126,21 @@ EXPORT_SYMBOL(rdma_nl_unregister); void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, int len, int client, int op, int flags) { - unsigned char *prev_tail; - - prev_tail = skb_tail_pointer(skb); - *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), - len, flags); + *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags); if (!*nlh) - goto out_nlmsg_trim; - (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; + return NULL; return nlmsg_data(*nlh); - -out_nlmsg_trim: - nlmsg_trim(skb, prev_tail); - return NULL; } EXPORT_SYMBOL(ibnl_put_msg); int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, int len, void *data, int type) { - unsigned char *prev_tail; - - prev_tail = skb_tail_pointer(skb); - if (nla_put(skb, type, len, data)) - goto nla_put_failure; - nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail; + if (nla_put(skb, type, len, data)) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } return 0; - -nla_put_failure: - nlmsg_trim(skb, prev_tail - nlh->nlmsg_len); - return -EMSGSIZE; } EXPORT_SYMBOL(ibnl_put_attr); -- cgit v1.2.3 From f00e64637061876ec7b6383b0bd80197c51e7312 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:35:20 +0300 Subject: RDMA/netlink: Rename and remove redundant parameter from ibnl_unicast* Netlink message header is not needed for unicast reply, hence remove it. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/iwpm_msg.c | 6 +++--- drivers/infiniband/core/iwpm_util.c | 4 ++-- drivers/infiniband/core/netlink.c | 10 ++++------ include/rdma/rdma_netlink.h | 8 ++------ 4 files changed, 11 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 45de263305f5..ca3c160bb9da 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -172,7 +172,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) goto add_mapping_error; nlmsg_request->req_buffer = pm_msg; - ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); + ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; @@ -248,7 +248,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) goto query_mapping_error; nlmsg_request->req_buffer = pm_msg; - ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); + ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ err_str = "Unable to send a nlmsg"; @@ -308,7 +308,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) if (ret) goto remove_mapping_error; - ret = ibnl_unicast_wait(skb, nlh, iwpm_user_pid); + ret = rdma_nl_unicast_wait(skb, iwpm_user_pid); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNDEFINED; diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index c46442ac71a2..c81c55942626 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -597,7 +597,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); if (ret) goto mapinfo_num_error; - ret = ibnl_unicast(skb, nlh, iwpm_pid); + ret = rdma_nl_unicast(skb, iwpm_pid); if (ret) { skb = NULL; err_str = "Unable to send a nlmsg"; @@ -626,7 +626,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) return -ENOMEM; } nlh->nlmsg_type = NLMSG_DONE; - ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid); + ret = rdma_nl_unicast(skb, iwpm_pid); if (ret) pr_warn("%s Unable to send a nlmsg\n", __func__); return ret; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index e2395a1d9f45..b95a70013f19 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -239,25 +239,23 @@ static void rdma_nl_rcv(struct sk_buff *skb) mutex_unlock(&rdma_nl_mutex); } -int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid) +int rdma_nl_unicast(struct sk_buff *skb, u32 pid) { int err; err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT); return (err < 0) ? err : 0; } -EXPORT_SYMBOL(ibnl_unicast); +EXPORT_SYMBOL(rdma_nl_unicast); -int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid) +int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid) { int err; err = netlink_unicast(nls, skb, pid, 0); return (err < 0) ? err : 0; } -EXPORT_SYMBOL(ibnl_unicast_wait); +EXPORT_SYMBOL(rdma_nl_unicast_wait); int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags) diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 6ea36ec45401..e7b0779385e9 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -56,22 +56,18 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, /** * Send the supplied skb to a specific userspace PID. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid); +int rdma_nl_unicast(struct sk_buff *skb, u32 pid); /** * Send, with wait/1 retry, the supplied skb to a specific userspace PID. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @pid: Userspace netlink process ID * Returns 0 on success or a negative error code. */ -int ibnl_unicast_wait(struct sk_buff *skb, struct nlmsghdr *nlh, - __u32 pid); +int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. -- cgit v1.2.3 From 4d7f693af0c9d0d6940ff36f5adca1adfa0e7e6e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:44:32 +0300 Subject: RDMA/netlink: Rename and remove redundant parameter from ibnl_multicast The pointer to netlink header was not used in the ibnl_multicast function, so let's remove it and simplify the function signature. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/addr.c | 2 +- drivers/infiniband/core/iwpm_msg.c | 2 +- drivers/infiniband/core/netlink.c | 5 ++--- drivers/infiniband/core/sa_query.c | 2 +- include/rdma/rdma_netlink.h | 4 +--- 5 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 9f3339861ec5..30cf764824ec 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -184,7 +184,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL); + rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index ca3c160bb9da..30825bb9b8e9 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -103,7 +103,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); - ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); + ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL); if (ret) { skb = NULL; /* skb is freed in the netlink send-op handling */ iwpm_user_pid = IWPM_PID_UNAVAILABLE; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index b95a70013f19..5c627d1fbaa9 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -257,12 +257,11 @@ int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid) } EXPORT_SYMBOL(rdma_nl_unicast_wait); -int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, - unsigned int group, gfp_t flags) +int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags) { return nlmsg_multicast(nls, skb, 0, group, flags); } -EXPORT_SYMBOL(ibnl_multicast); +EXPORT_SYMBOL(rdma_nl_multicast); int __init rdma_nl_init(void) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b499f4422f41..977f64d0e983 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -861,7 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask) /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask); + ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); if (!ret) ret = len; else diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index e7b0779385e9..16a94c425938 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -72,12 +72,10 @@ int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); /** * Send the supplied skb to a netlink group. * @skb: The netlink skb - * @nlh: Header of the netlink message to send * @group: Netlink group ID * @flags: allocation flags * Returns 0 on success or a negative error code. */ -int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, - unsigned int group, gfp_t flags); +int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); #endif /* _RDMA_NETLINK_H */ -- cgit v1.2.3 From ff61c425c1c563f1d688d59caf3b18a395cbf9c4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 15:51:16 +0300 Subject: RDMA/netlink: Simplify and rename ibnl_chk_listeners Make ibnl_chk_listeners function to be one line by removing unneeded comparison. Rename that function to be complaint to other functions in RDMA netlink. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/addr.c | 2 +- drivers/infiniband/core/netlink.c | 7 +++---- drivers/infiniband/core/sa_query.c | 2 +- include/rdma/rdma_netlink.h | 6 ++++++ 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 30cf764824ec..7310ece99cd9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -325,7 +325,7 @@ static void queue_req(struct addr_req *req) static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { - if (ibnl_chk_listeners(RDMA_NL_GROUP_LS)) + if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) return -EADDRNOTAVAIL; /* We fill in what we can, the response will fill the rest */ diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 5c627d1fbaa9..514959ccaf2d 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -47,12 +47,11 @@ static struct { const struct ibnl_client_cbs *cb_table; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; -int ibnl_chk_listeners(unsigned int group) +int rdma_nl_chk_listeners(unsigned int group) { - if (netlink_has_listeners(nls, group) == 0) - return -1; - return 0; + return (netlink_has_listeners(nls, group)) ? 0 : -1; } +EXPORT_SYMBOL(rdma_nl_chk_listeners); static bool is_nl_msg_valid(unsigned int type, unsigned int op) { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 977f64d0e983..2cc85c2b74b7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1418,7 +1418,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && (!(query->flags & IB_SA_QUERY_OPA))) { - if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) { + if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { if (!ib_nl_make_request(query, gfp_mask)) return id; } diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 16a94c425938..348e0bbe0fc9 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -78,4 +78,10 @@ int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid); */ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags); +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int rdma_nl_chk_listeners(unsigned int group); #endif /* _RDMA_NETLINK_H */ -- cgit v1.2.3 From 3250b4dbd87aa08c21891cabfc6f6b48b36fd7e5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Jun 2017 18:23:45 +0300 Subject: RDMA/netlink: Rename netlink callback struct The RDMA netlink client infrastructure was removed and made obsolete. The old infrastructure defined struct ibnl_client_cbs. Now that all uses of this have been updated to the new infrastructure, rename the struct to be compliant with the current stack naming standards: struct rdma_nl_cbs. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/cma.c | 2 +- drivers/infiniband/core/device.c | 2 +- drivers/infiniband/core/iwcm.c | 2 +- drivers/infiniband/core/netlink.c | 4 ++-- include/rdma/rdma_netlink.h | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 0c85f140e616..d8edd8b11561 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4458,7 +4458,7 @@ out: return skb->len; } -static const struct ibnl_client_cbs cma_cb_table[] = { +static const struct rdma_nl_cbs cma_cb_table[] = { [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats}, }; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7ae29cc49a5e..33a39518848c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1086,7 +1086,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, } EXPORT_SYMBOL(ib_get_net_dev_by_params); -static const struct ibnl_client_cbs ibnl_ls_cb_table[] = { +static const struct rdma_nl_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { .dump = ib_nl_handle_resolve_resp, .flags = RDMA_NL_ADMIN_PERM, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 8599271d8be6..452a3115e3e6 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -80,7 +80,7 @@ const char *__attribute_const__ iwcm_reject_msg(int reason) } EXPORT_SYMBOL(iwcm_reject_msg); -static struct ibnl_client_cbs iwcm_nl_cb_table[] = { +static struct rdma_nl_cbs iwcm_nl_cb_table[] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 514959ccaf2d..a7082adae16b 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -44,7 +44,7 @@ static DEFINE_MUTEX(rdma_nl_mutex); static struct sock *nls; static struct { - const struct ibnl_client_cbs *cb_table; + const struct rdma_nl_cbs *cb_table; } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; int rdma_nl_chk_listeners(unsigned int group) @@ -84,7 +84,7 @@ static bool is_nl_valid(unsigned int type, unsigned int op) } void rdma_nl_register(unsigned int index, - const struct ibnl_client_cbs cb_table[]) + const struct rdma_nl_cbs cb_table[]) { mutex_lock(&rdma_nl_mutex); if (!is_nl_msg_valid(index, 0)) { diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 348e0bbe0fc9..92f8832297ab 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -5,7 +5,7 @@ #include #include -struct ibnl_client_cbs { +struct rdma_nl_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); u8 flags; }; @@ -21,7 +21,7 @@ enum rdma_nl_flags { * @cb_table: A table for op->callback */ void rdma_nl_register(unsigned int index, - const struct ibnl_client_cbs cb_table[]); + const struct rdma_nl_cbs cb_table[]); /** * Remove a client from IB netlink. -- cgit v1.2.3 From 8030c8357a94ce6397dd8df6296925f0f4b1f9b7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 19 Jun 2017 14:04:56 +0300 Subject: RDMA/core: Add iterator over ib_devices The coming nldev needs to iterate over all IB devices in the system and in order to not expose the ib_devices list outside the devices.c, it is necessary to provide function iterator. Current version is written explicitly for nldev callback to avoid over-engineering at this stage, but it can be easily extended for other types. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/core_priv.h | 8 ++++++++ drivers/infiniband/core/device.c | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index e759c27113cd..0c175590cf92 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -102,6 +102,14 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, roce_netdev_callback cb, void *cookie); +typedef int (*nldev_callback)(struct ib_device *device, + struct sk_buff *skb, + struct netlink_callback *cb, + unsigned int idx); + +int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, + struct netlink_callback *cb); + enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 33a39518848c..8828f26250a8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -892,6 +892,31 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, up_read(&lists_rwsem); } +/** + * ib_enum_all_devs - enumerate all ib_devices + * @cb: Callback to call for each found ib_device + * + * Enumerates all ib_devices and calls callback() on each device. + */ +int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct ib_device *dev; + unsigned int idx = 0; + int ret = 0; + + down_read(&lists_rwsem); + list_for_each_entry(dev, &device_list, core_list) { + ret = nldev_cb(dev, skb, cb, idx); + if (ret) + break; + idx++; + } + + up_read(&lists_rwsem); + return ret; +} + /** * ib_query_pkey - Get P_Key table entry * @device:Device to query -- cgit v1.2.3 From ecc82c53f9a4ce08ba7df626a4262c86841ced8f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 14:39:59 +0300 Subject: RDMA/core: Add and expose static device index This patch adds static device index in similar fashion to already available in netdev world (struct net->ifindex). In downstream patches, the RDMA nelink will use this idx-to-ib_device conversion, so as part of this commit, we are exposing the translation function to be visible for IB/core users. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/core_priv.h | 5 +++++ drivers/infiniband/core/device.c | 37 ++++++++++++++++++++++++++++++++++++- include/rdma/ib_verbs.h | 2 ++ 3 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 0c175590cf92..cbdcc81e1df8 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -309,4 +309,9 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map, return 0; } #endif + +struct ib_device *__ib_device_get_by_index(u32 ifindex); +/* RDMA device netlink */ +void nldev_init(void); +void nldev_exit(void); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8828f26250a8..deae8b940994 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -134,6 +134,17 @@ static int ib_device_check_mandatory(struct ib_device *device) return 0; } +struct ib_device *__ib_device_get_by_index(u32 index) +{ + struct ib_device *device; + + list_for_each_entry(device, &device_list, core_list) + if (device->index == index) + return device; + + return NULL; +} + static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; @@ -145,7 +156,6 @@ static struct ib_device *__ib_device_get_by_name(const char *name) return NULL; } - static int alloc_name(char *name) { unsigned long *inuse; @@ -394,6 +404,30 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event, return NOTIFY_OK; } +/** + * __dev_new_index - allocate an device index + * + * Returns a suitable unique value for a new device interface + * number. It assumes that there are less than 2^32-1 ib devices + * will be present in the system. + */ +static u32 __dev_new_index(void) +{ + /* + * The device index to allow stable naming. + * Similar to struct net -> ifindex. + */ + static u32 index; + + for (;;) { + if (!(++index)) + index = 1; + + if (!__ib_device_get_by_index(index)) + return index; + } +} + /** * ib_register_device - Register an IB device with IB core * @device:Device to register @@ -492,6 +526,7 @@ int ib_register_device(struct ib_device *device, if (client->add && !add_client_context(device, client)) client->add(device); + device->index = __dev_new_index(); down_write(&lists_rwsem); list_add_tail(&device->core_list, &device_list); up_write(&lists_rwsem); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1082b4c81b2c..3391df5fdc9c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2298,6 +2298,8 @@ struct ib_device { struct rdmacg_device cg_device; #endif + u32 index; + /** * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this -- cgit v1.2.3 From 1830ba21b9a475cfc6159e6cfe532c75fe7682a4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Jun 2017 12:46:33 +0300 Subject: RDMA/netlink: Add and implement doit netlink callback The .doit callback is used by netlink core to differentiate between get and set operations. Common convention is to use that call for command operations like (SET, ADD, e.t.c.) and/or access without NLF_M_DUMP flag. This commit adds proper declaration and implementation to RDMA netlink. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 19 ++++++++++++++----- include/rdma/rdma_netlink.h | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index a7082adae16b..484d6a8a2811 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -76,9 +76,13 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) static bool is_nl_valid(unsigned int type, unsigned int op) { - if (!is_nl_msg_valid(type, op) || - !rdma_nl_types[type].cb_table || - !rdma_nl_types[type].cb_table[op].dump) + const struct rdma_nl_cbs *cb_table; + + if (!is_nl_msg_valid(type, op)) + return false; + + cb_table = rdma_nl_types[type].cb_table; + if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) return false; return true; } @@ -151,6 +155,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int op = RDMA_NL_GET_OP(type); struct netlink_callback cb = {}; struct netlink_dump_control c = {}; + int ret; if (!is_nl_valid(index, op)) return -EINVAL; @@ -169,10 +174,14 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, cb.nlh = nlh; cb.dump = rdma_nl_types[index].cb_table[op].dump; return cb.dump(skb, &cb); + } else { + c.dump = rdma_nl_types[index].cb_table[op].dump; + return netlink_dump_start(nls, skb, nlh, &c); } + if (rdma_nl_types[index].cb_table[op].doit) + ret = rdma_nl_types[index].cb_table[op].doit(skb, nlh, extack); + return ret; - c.dump = rdma_nl_types[index].cb_table[op].dump; - return netlink_dump_start(nls, skb, nlh, &c); } /* diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 92f8832297ab..e25bf1988846 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,8 @@ #include struct rdma_nl_cbs { + int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); u8 flags; }; -- cgit v1.2.3 From c729943a77c108253c46b2d50c8a15a888facf4c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Jun 2017 13:14:13 +0300 Subject: RDMA/netlink: Reduce indirection access to cb_table Introduce intermediate variable to store access to fields of cb_table. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 484d6a8a2811..e36c39e3cc2b 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -155,12 +155,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int op = RDMA_NL_GET_OP(type); struct netlink_callback cb = {}; struct netlink_dump_control c = {}; + const struct rdma_nl_cbs *cb_table; int ret; if (!is_nl_valid(index, op)) return -EINVAL; - if ((rdma_nl_types[index].cb_table[op].flags & RDMA_NL_ADMIN_PERM) && + cb_table = rdma_nl_types[type].cb_table; + + if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; @@ -172,14 +175,14 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, (index == RDMA_NL_LS && op == RDMA_NL_LS_OP_SET_TIMEOUT)) { cb.skb = skb; cb.nlh = nlh; - cb.dump = rdma_nl_types[index].cb_table[op].dump; + cb.dump = cb_table[op].dump; return cb.dump(skb, &cb); } else { - c.dump = rdma_nl_types[index].cb_table[op].dump; + c.dump = cb_table[op].dump; return netlink_dump_start(nls, skb, nlh, &c); } - if (rdma_nl_types[index].cb_table[op].doit) - ret = rdma_nl_types[index].cb_table[op].doit(skb, nlh, extack); + if (cb_table[op].doit) + ret = cb_table[op].doit(skb, nlh, extack); return ret; } -- cgit v1.2.3 From 647c75ac59a48a54dafd4475d14a645a0025a4f4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Jun 2017 14:20:39 +0300 Subject: RDMA/netlink: Convert LS to doit callback RDMA_NL_LS protocol is actually does not dump anything, but sets data and it should be handled by doit callback. This patch actually converts RDMA_NL_LS to doit callback, while preserving IWCM and RDMA_CM flows through netlink_dump_start(). Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/addr.c | 5 ++--- drivers/infiniband/core/core_priv.h | 9 ++++++--- drivers/infiniband/core/device.c | 6 +++--- drivers/infiniband/core/netlink.c | 28 ++++++++++------------------ drivers/infiniband/core/sa_query.c | 8 ++++---- 5 files changed, 25 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 7310ece99cd9..16b3bdffeb96 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -129,10 +129,9 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) } int ib_nl_handle_ip_res_resp(struct sk_buff *skb, - struct netlink_callback *cb) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; - if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cbdcc81e1df8..57beb1cceda3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -198,11 +198,14 @@ void rdma_nl_exit(void); int ibnl_chk_listeners(unsigned int group); int ib_nl_handle_resolve_resp(struct sk_buff *skb, - struct netlink_callback *cb); + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); int ib_nl_handle_set_timeout(struct sk_buff *skb, - struct netlink_callback *cb); + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); int ib_nl_handle_ip_res_resp(struct sk_buff *skb, - struct netlink_callback *cb); + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack); int ib_get_cached_subnet_prefix(struct ib_device *device, u8 port_num, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index deae8b940994..5272c3806c39 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1148,15 +1148,15 @@ EXPORT_SYMBOL(ib_get_net_dev_by_params); static const struct rdma_nl_cbs ibnl_ls_cb_table[] = { [RDMA_NL_LS_OP_RESOLVE] = { - .dump = ib_nl_handle_resolve_resp, + .doit = ib_nl_handle_resolve_resp, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NL_LS_OP_SET_TIMEOUT] = { - .dump = ib_nl_handle_set_timeout, + .doit = ib_nl_handle_set_timeout, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NL_LS_OP_IP_RESOLVE] = { - .dump = ib_nl_handle_ip_res_resp, + .doit = ib_nl_handle_ip_res_resp, .flags = RDMA_NL_ADMIN_PERM, }, }; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index e36c39e3cc2b..145ad5343780 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -153,38 +153,30 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int type = nlh->nlmsg_type; unsigned int index = RDMA_NL_GET_CLIENT(type); unsigned int op = RDMA_NL_GET_OP(type); - struct netlink_callback cb = {}; - struct netlink_dump_control c = {}; const struct rdma_nl_cbs *cb_table; - int ret; if (!is_nl_valid(index, op)) return -EINVAL; - cb_table = rdma_nl_types[type].cb_table; + cb_table = rdma_nl_types[index].cb_table; if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - /* - * For response or local service set_timeout request, - * there is no need to use netlink_dump_start. - */ - if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || - (index == RDMA_NL_LS && op == RDMA_NL_LS_OP_SET_TIMEOUT)) { - cb.skb = skb; - cb.nlh = nlh; - cb.dump = cb_table[op].dump; - return cb.dump(skb, &cb); - } else { - c.dump = cb_table[op].dump; + /* FIXME: Convert IWCM to properly handle doit callbacks */ + if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM || + index == RDMA_NL_IWCM) { + struct netlink_dump_control c = { + .dump = cb_table[op].dump, + }; return netlink_dump_start(nls, skb, nlh, &c); } + if (cb_table[op].doit) - ret = cb_table[op].doit(skb, nlh, extack); - return ret; + return cb_table[op].doit(skb, nlh, extack); + return 0; } /* diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 2cc85c2b74b7..da29e2863c84 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1021,9 +1021,9 @@ static void ib_nl_request_timeout(struct work_struct *work) } int ib_nl_handle_set_timeout(struct sk_buff *skb, - struct netlink_callback *cb) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; int timeout, delta, abs_delta; const struct nlattr *attr; unsigned long flags; @@ -1097,9 +1097,9 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) } int ib_nl_handle_resolve_resp(struct sk_buff *skb, - struct netlink_callback *cb) + struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; unsigned long flags; struct ib_sa_query *query; struct ib_mad_send_buf *send_buf; -- cgit v1.2.3 From 8bc67414f28c995ccfa29a12984b5dae188b3df8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 18 Jun 2017 16:37:27 +0300 Subject: RDMA/netlink: Update copyright Add Mellanox to the copyright header. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 145ad5343780..cd692bd73793 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved. * Copyright (c) 2010 Voltaire Inc. All rights reserved. * * This software is available to you under a choice of one of two -- cgit v1.2.3 From 1a6e7c31d71db34d1b9bc3acc87eaea6c2ecc997 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 07:55:53 +0300 Subject: RDMA/netlink: Add netlink device definitions to UAPI Introduce new defines to rdma_netlink.h, so the RDMA configuration tool will be able to communicate with RDMA subsystem by using the shared defines. The addition of new client (NLDEV) revealed the fact that we exposed by mistake the RDMA_NL_I40IW define which is not backed by any RDMA netlink by now and it won't be exposed in the future too. So this patch reuses the value and deletes the old defines. The NLDEV operates with objects. The struct ib_device has two straightforward objects: device itself and ports of that device. This brings us to propose the following commands to work on those objects: * RDMA_NLDEV_CMD_{GET,SET,NEW,DEL} - works on ib_device itself * RDMA_NLDEV_CMD_PORT_{GET,SET,NEW,DEL} - works on ports of specific ib_device Those commands receive/return the device index (RDMA_NLDEV_ATTR_DEV_INDEX) and port index (RDMA_NLDEV_ATTR_PORT_INDEX). For device object accesses, the RDMA_NLDEV_ATTR_PORT_INDEX will return the maximum number of ports for specific ib_device and for port access the actual port index. The port index starts from 1 to follow RDMA/core internal semantics and the sysfs exposed knobs. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/netlink.c | 2 +- include/uapi/rdma/rdma_netlink.h | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index cd692bd73793..27352a352770 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -61,7 +61,7 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) RDMA_NL_IWPM_NUM_OPS, 0, RDMA_NL_LS_NUM_OPS, - 0 }; + RDMA_NLDEV_NUM_OPS }; /* * This BUILD_BUG_ON is intended to catch addition of new diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 02fe8390c18f..a44229fa5eca 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -8,7 +8,7 @@ enum { RDMA_NL_IWCM, RDMA_NL_RSVD, RDMA_NL_LS, /* RDMA Local Services */ - RDMA_NL_I40IW, + RDMA_NL_NLDEV, /* RDMA device interface */ RDMA_NL_NUM_CLIENTS }; @@ -222,4 +222,41 @@ struct rdma_nla_ls_gid { __u8 gid[16]; }; +enum rdma_nldev_command { + RDMA_NLDEV_CMD_UNSPEC, + + RDMA_NLDEV_CMD_GET, /* can dump */ + RDMA_NLDEV_CMD_SET, + RDMA_NLDEV_CMD_NEW, + RDMA_NLDEV_CMD_DEL, + + RDMA_NLDEV_CMD_PORT_GET, /* can dump */ + RDMA_NLDEV_CMD_PORT_SET, + RDMA_NLDEV_CMD_PORT_NEW, + RDMA_NLDEV_CMD_PORT_DEL, + + RDMA_NLDEV_NUM_OPS +}; + +enum rdma_nldev_attr { + /* don't change the order or add anything between, this is ABI! */ + RDMA_NLDEV_ATTR_UNSPEC, + + /* Identifier for ib_device */ + RDMA_NLDEV_ATTR_DEV_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_DEV_NAME, /* string */ + /* + * Device index together with port index are identifiers + * for port/link properties. + * + * For RDMA_NLDEV_CMD_GET commamnd, port index will return number + * of available ports in ib_device, while for port specific operations, + * it will be real port index as it appears in sysfs. Port index follows + * sysfs notation and starts from 1 for the first port. + */ + RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + + RDMA_NLDEV_ATTR_MAX +}; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 6c80b41abe22ae3c0d98f39a88f4b8fb501910d3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 09:14:15 +0300 Subject: RDMA/netlink: Add nldev initialization flows Add nldev init and exit flows to the RDMA/core. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/Makefile | 3 ++- drivers/infiniband/core/device.c | 2 ++ drivers/infiniband/core/nldev.c | 45 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/core/nldev.c (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index e3cdafff8ece..920609a0872e 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -11,7 +11,8 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ multicast.o mad.o smi.o agent.o mad_rmpp.o \ - security.o + security.o nldev.o + ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5272c3806c39..66b109bc6753 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1212,6 +1212,7 @@ static int __init ib_core_init(void) goto err_sa; } + nldev_init(); rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table); ib_cache_setup(); @@ -1237,6 +1238,7 @@ err: static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); + nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); unregister_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c new file mode 100644 index 000000000000..1d1e4f214874 --- /dev/null +++ b/drivers/infiniband/core/nldev.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "core_priv.h" + +void __init nldev_init(void) +{ + rdma_nl_register(RDMA_NL_NLDEV, NULL); +} + +void __exit nldev_exit(void) +{ + rdma_nl_unregister(RDMA_NL_NLDEV); +} -- cgit v1.2.3 From b4c598a67ea19c5784e542c03dd912a0ce36109a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 09:59:14 +0300 Subject: RDMA/netlink: Implement nldev device dumpit calback This patch adds the ability to return all available devices together with their properties. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 64 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 1d1e4f214874..4b8b0f5d5344 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -30,13 +30,75 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include "core_priv.h" +static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { + [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, + .len = IB_DEVICE_NAME_MAX - 1}, + [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, +}; + +static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) +{ + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) + return -EMSGSIZE; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) + return -EMSGSIZE; + return 0; +} + +static int _nldev_get_dumpit(struct ib_device *device, + struct sk_buff *skb, + struct netlink_callback *cb, + unsigned int idx) +{ + int start = cb->args[0]; + struct nlmsghdr *nlh; + + if (idx < start) + return 0; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), + 0, NLM_F_MULTI); + + if (fill_dev_info(skb, device)) { + nlmsg_cancel(skb, nlh); + goto out; + } + + nlmsg_end(skb, nlh); + + idx++; + +out: cb->args[0] = idx; + return skb->len; +} + +static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + /* + * There is no need to take lock, because + * we are relying on ib_core's lists_rwsem + */ + return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); +} + +static const struct rdma_nl_cbs nldev_cb_table[] = { + [RDMA_NLDEV_CMD_GET] = { + .dump = nldev_get_dumpit, + }, +}; + void __init nldev_init(void) { - rdma_nl_register(RDMA_NL_NLDEV, NULL); + rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } void __exit nldev_exit(void) -- cgit v1.2.3 From e5c9469efcb18a6b7aed5e6f32e478b0298ad968 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 15 Jun 2017 20:33:08 +0300 Subject: RDMA/netlink: Add nldev device doit implementation Provide ability to query specific device. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 4b8b0f5d5344..666940f2b49a 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -53,6 +53,45 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return 0; } +static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + struct sk_buff *msg; + u32 index; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + + device = __ib_device_get_by_index(index); + if (!device) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), + 0, 0); + + err = fill_dev_info(msg, device); + if (err) { + nlmsg_free(msg); + return err; + } + + nlmsg_end(msg, nlh); + + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); +} + static int _nldev_get_dumpit(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb, @@ -92,6 +131,7 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) static const struct rdma_nl_cbs nldev_cb_table[] = { [RDMA_NLDEV_CMD_GET] = { + .doit = nldev_get_doit, .dump = nldev_get_dumpit, }, }; -- cgit v1.2.3 From 7d02f605f0dce0ef1b76aeffe2d36794738f24a0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 11:30:33 +0300 Subject: RDMA/netlink: Add nldev port dumpit implementation This patch implements the query interface to get all ports data for the specific device. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 71 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 666940f2b49a..1318e246196a 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -53,6 +53,18 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return 0; } +static int fill_port_info(struct sk_buff *msg, + struct ib_device *device, u32 port) +{ + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) + return -EMSGSIZE; + if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) + return -EMSGSIZE; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) + return -EMSGSIZE; + return 0; +} + static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -129,11 +141,70 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); } +static int nldev_port_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + int start = cb->args[0]; + struct nlmsghdr *nlh; + u32 idx = 0; + u32 ifindex; + int err; + u32 p; + + err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, NULL); + if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) + return -EINVAL; + + ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = __ib_device_get_by_index(ifindex); + if (!device) + return -EINVAL; + + for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { + /* + * The dumpit function returns all information from specific + * index. This specific index is taken from the netlink + * messages request sent by user and it is available + * in cb->args[0]. + * + * Usually, the user doesn't fill this field and it causes + * to return everything. + * + */ + if (idx < start) { + idx++; + continue; + } + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, + RDMA_NLDEV_CMD_PORT_GET), + 0, NLM_F_MULTI); + + if (fill_port_info(skb, device, p)) { + nlmsg_cancel(skb, nlh); + goto out; + } + idx++; + nlmsg_end(skb, nlh); + } + +out: cb->args[0] = idx; + return skb->len; +} + static const struct rdma_nl_cbs nldev_cb_table[] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, }, + [RDMA_NLDEV_CMD_PORT_GET] = { + .dump = nldev_port_get_dumpit, + }, }; void __init nldev_init(void) -- cgit v1.2.3 From c3f66f7b0052ea854744372fdaae7817f5358e4f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 22 Jun 2017 16:10:38 +0300 Subject: RDMA/netlink: Implement nldev port doit callback Provide ability to get specific to device and port information. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 1318e246196a..db9d9ffc1415 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -141,6 +141,49 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); } +static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; + struct ib_device *device; + struct sk_buff *msg; + u32 index; + u32 port; + int err; + + err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, + nldev_policy, extack); + if (err || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) + return -EINVAL; + + index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); + device = __ib_device_get_by_index(index); + if (!device) + return -EINVAL; + + port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); + if (!rdma_is_port_valid(device, port)) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), + 0, 0); + + err = fill_port_info(msg, device, port); + if (err) { + nlmsg_free(msg); + return err; + } + + nlmsg_end(msg, nlh); + + return rdma_nl_unicast(msg, NETLINK_CB(skb).portid); +} + static int nldev_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { @@ -203,6 +246,7 @@ static const struct rdma_nl_cbs nldev_cb_table[] = { .dump = nldev_get_dumpit, }, [RDMA_NLDEV_CMD_PORT_GET] = { + .doit = nldev_port_get_doit, .dump = nldev_port_get_dumpit, }, }; -- cgit v1.2.3 From ac50525374315b9b609747f83b07f8dccb06b722 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 20 Jun 2017 14:47:08 +0300 Subject: RDMA/netlink: Expose device and port capability masks The port capability mask is exposed to user space via sysfs interface, while device capabilities are available for verbs only. This patch provides those capabilities through netlink interface. Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise --- drivers/infiniband/core/nldev.c | 19 +++++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 24 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index db9d9ffc1415..94c1e49074f5 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -50,18 +50,37 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) return -EMSGSIZE; + + BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + device->attrs.device_cap_flags, 0)) + return -EMSGSIZE; + return 0; } static int fill_port_info(struct sk_buff *msg, struct ib_device *device, u32 port) { + struct ib_port_attr attr; + int ret; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) return -EMSGSIZE; + + ret = ib_query_port(device, port, &attr); + if (ret) + return ret; + + BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64)); + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, + (u64)attr.port_cap_flags, 0)) + return -EMSGSIZE; + return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index a44229fa5eca..90de11db6580 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -257,6 +257,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_PORT_INDEX, /* u32 */ + /* + * Device and port capabilities + */ + RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 9abb0d1bbd9529c574eacd8586e2bf68d17966cd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:49:53 +0300 Subject: RDMA: Simplify get firmware interface There is a need to forward FW version to user space application through RDMA netlink. In order to make it safe, there is need to declare nla_policy and limit the size of FW string. The new define IB_FW_VERSION_NAME_MAX will limit the size of FW version string. That define was chosen to be equal to ETHTOOL_FWVERS_LEN, because many drivers anyway are limited by that value indirectly. The introduction of this define allows us to remove the string size from get_fw_str function signature. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/core/sysfs.c | 4 ++-- drivers/infiniband/hw/cxgb3/iwch_provider.c | 5 ++--- drivers/infiniband/hw/cxgb4/provider.c | 5 ++--- drivers/infiniband/hw/hfi1/verbs.c | 5 ++--- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 7 +++---- drivers/infiniband/hw/mlx4/main.c | 5 ++--- drivers/infiniband/hw/mlx5/main.c | 8 ++++---- drivers/infiniband/hw/mthca/mthca_provider.c | 5 ++--- drivers/infiniband/hw/nes/nes_verbs.c | 5 ++--- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 5 ++--- drivers/infiniband/hw/qedr/main.c | 5 ++--- drivers/infiniband/hw/usnic/usnic_ib_main.c | 6 ++---- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 5 ++--- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 +-- include/rdma/ib_verbs.h | 6 ++++-- 16 files changed, 36 insertions(+), 47 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 66b109bc6753..fbc92c649be8 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -336,10 +336,10 @@ static int read_port_immutable(struct ib_device *device) return 0; } -void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) +void ib_get_device_fw_str(struct ib_device *dev, char *str) { if (dev->get_dev_fw_str) - dev->get_dev_fw_str(dev, str, str_len); + dev->get_dev_fw_str(dev, str); else str[0] = '\0'; } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 7ebe1ef23652..abc5ab581f82 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1210,8 +1210,8 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct ib_device *dev = container_of(device, struct ib_device, dev); - ib_get_device_fw_str(dev, buf, PAGE_SIZE); - strlcat(buf, "\n", PAGE_SIZE); + ib_get_device_fw_str(dev, buf); + strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); return strlen(buf); } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 0cd0c1fa27d4..099e76f3758a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1336,8 +1336,7 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) { struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); struct ethtool_drvinfo info; @@ -1345,7 +1344,7 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str, pr_debug("%s dev 0x%p\n", __func__, iwch_dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, str_len, "%s", info.fw_version); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } int iwch_register_device(struct iwch_dev *dev) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0771e9a4d061..346e8334279a 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -517,14 +517,13 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *dev, char *str) { struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev); pr_debug("%s dev 0x%p\n", __func__, dev); - snprintf(str, str_len, "%u.%u.%u.%u", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u", FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers), FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers), FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers), diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc51bf247006..c88c03c11555 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1561,14 +1561,13 @@ static void init_ibport(struct hfi1_pportdata *ppd) RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); } -static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); struct hfi1_ibdev *dev = dev_from_rdi(rdi); u32 ver = dd_from_dev(dev)->dc8051_ver; - snprintf(str, str_len, "%u.%u.%u", dc8051_ver_maj(ver), + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver), dc8051_ver_min(ver), dc8051_ver_patch(ver)); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 02d871db7ca5..1aa411034a27 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2584,13 +2584,12 @@ static const char * const i40iw_hw_stat_names[] = { "iwRdmaInv" }; -static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str) { u32 firmware_version = I40IW_FW_VERSION; - snprintf(str, str_len, "%u.%u", firmware_version, - (firmware_version & 0x000000ff)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version, + (firmware_version & 0x000000ff)); } /** diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1f25a37eb056..c636842c5be0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2587,12 +2587,11 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_fw_ver_str(struct ib_device *device, char *str, - size_t str_len) +static void get_fw_ver_str(struct ib_device *device, char *str) { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d", (int) (dev->dev->caps.fw_ver >> 32), (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, (int) dev->dev->caps.fw_ver & 0xffff); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9279631d8da0..0a5a4e3fa66d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3285,13 +3285,13 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *ibdev, char *str) { struct mlx5_ib_dev *dev = container_of(ibdev, struct mlx5_ib_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), - fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d", + fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), + fw_rev_sub(dev->mdev)); } static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c197cd9b193f..eae9bffd45d4 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1178,12 +1178,11 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *device, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *device, char *str) { struct mthca_dev *dev = container_of(device, struct mthca_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d", (int) (dev->fw_ver >> 32), (int) (dev->fw_ver >> 16) & 0xffff, (int) dev->fw_ver & 0xffff); diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 25dcd7573df9..c2943e39d2f9 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -3672,15 +3672,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *dev, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *dev, char *str) { struct nes_ib_device *nesibdev = container_of(dev, struct nes_ib_device, ibdev); struct nes_vnic *nesvnic = nesibdev->nesvnic; nes_debug(NES_DBG_INIT, "\n"); - snprintf(str, str_len, "%u.%u", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", (nesvnic->nesdev->nesadapter->firmware_version >> 16), (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 757c65816295..fbfbd9e96147 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -107,12 +107,11 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void get_dev_fw_str(struct ib_device *device, char *str, - size_t str_len) +static void get_dev_fw_str(struct ib_device *device, char *str) { struct ocrdma_dev *dev = get_ocrdma_dev(device); - snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]); } static int ocrdma_register_device(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 199b6edbef92..97d033f51dc9 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -68,13 +68,12 @@ static enum rdma_link_layer qedr_link_layer(struct ib_device *device, return IB_LINK_LAYER_ETHERNET; } -static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str, - size_t str_len) +static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) { struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, str_len, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index e69c8e476a2b..e86700f994cb 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -333,9 +333,7 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } -static void usnic_get_dev_fw_str(struct ib_device *device, - char *str, - size_t str_len) +static void usnic_get_dev_fw_str(struct ib_device *device, char *str) { struct usnic_ib_dev *us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev); @@ -345,7 +343,7 @@ static void usnic_get_dev_fw_str(struct ib_device *device, us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); mutex_unlock(&us_ibdev->usdev_lock); - snprintf(str, str_len, "%s", info.fw_version); + snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } /* Start of PF discovery section */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e76565280afa..7f29e4db28a1 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -102,12 +102,11 @@ static struct device_attribute *pvrdma_class_attributes[] = { &dev_attr_board_id }; -static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str, - size_t str_len) +static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) { struct pvrdma_dev *dev = container_of(device, struct pvrdma_dev, ib_dev); - snprintf(str, str_len, "%d.%d.%d\n", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n", (int) (dev->dsr->caps.fw_ver >> 32), (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff, (int) dev->dsr->caps.fw_ver & 0xffff); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 7871379342f4..98e30b41e436 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -62,8 +62,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev, { struct ipoib_dev_priv *priv = ipoib_priv(netdev); - ib_get_device_fw_str(priv->ca, drvinfo->fw_version, - sizeof(drvinfo->fw_version)); + ib_get_device_fw_str(priv->ca, drvinfo->fw_version); strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3391df5fdc9c..e0e87a1f66fb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -64,6 +64,8 @@ #include #include +#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN + extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; @@ -2307,7 +2309,7 @@ struct ib_device { * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); - void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); + void (*get_dev_fw_str)(struct ib_device *, char *str); }; struct ib_client { @@ -2343,7 +2345,7 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); -void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); +void ib_get_device_fw_str(struct ib_device *device, char *str); int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, -- cgit v1.2.3 From 8621a7e3c1c22e18385c9ced1647363884ea2aa1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 27 Jun 2017 16:58:59 +0300 Subject: RDMA/netlink: Export FW version Add FW version to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 9 +++++++++ include/uapi/rdma/rdma_netlink.h | 4 ++++ 2 files changed, 13 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 94c1e49074f5..cdc970ca5a1b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -40,10 +40,14 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, .len = IB_DEVICE_NAME_MAX - 1}, [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, + .len = IB_FW_VERSION_NAME_MAX - 1}, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { + char fw[IB_FW_VERSION_NAME_MAX]; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) @@ -56,6 +60,11 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) device->attrs.device_cap_flags, 0)) return -EMSGSIZE; + ib_get_device_fw_str(device, fw); + /* Device without FW has strlen(fw) */ + if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) + return -EMSGSIZE; + return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 90de11db6580..5159858730b0 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -262,6 +262,10 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_CAP_FLAGS, /* u64 */ + /* + * FW version + */ + RDMA_NLDEV_ATTR_FW_VERSION, /* string */ RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 1aaff896ca6b968a639e3e1e72ba6146ba332501 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 14:01:37 +0300 Subject: RDMA/netlink: Export node_guid and sys_image_guid Add Node GUID and system image GUID to the device properties exported by RDMA netlink, to be used by RDMAtool. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 8 ++++++++ include/uapi/rdma/rdma_netlink.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index cdc970ca5a1b..f932c2c3fad0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -42,6 +42,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, .len = IB_FW_VERSION_NAME_MAX - 1}, + [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -65,6 +67,12 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, + be64_to_cpu(device->node_guid), 0)) + return -EMSGSIZE; + if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, + be64_to_cpu(device->attrs.sys_image_guid), 0)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 5159858730b0..fe3a7429e7a1 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -266,6 +266,19 @@ enum rdma_nldev_attr { * FW version */ RDMA_NLDEV_ATTR_FW_VERSION, /* string */ + + /* + * Node GUID (in host byte order) associated with the RDMA device. + */ + RDMA_NLDEV_ATTR_NODE_GUID, /* u64 */ + + /* + * System image GUID (in host byte order) associated with + * this RDMA device and other devices which are part of a + * single system. + */ + RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 12026fbba6af2fc53c3c6cf88bdfc6561986ba82 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:05:14 +0300 Subject: RDMA/netlink: Advertise IB subnet prefix Add IB subnet prefix to the port properties exported by RDMA netlink. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 5 +++++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f932c2c3fad0..7af71d5e52c8 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -44,6 +44,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { .len = IB_FW_VERSION_NAME_MAX - 1}, [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -97,6 +98,10 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, (u64)attr.port_cap_flags, 0)) return -EMSGSIZE; + if (rdma_protocol_ib(device, port) && + nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, + attr.subnet_prefix, 0)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index fe3a7429e7a1..481003182a35 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -279,6 +279,11 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, /* u64 */ + /* + * Subnet prefix (in host byte order) + */ + RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 80a06dd36f79de7007f21f5cbe42181a4e5c7d6d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:38:36 +0300 Subject: RDMA/netink: Export lids and sm_lids According to the IB specification, the LID and SM_LID are 16-bit wide, but to support OmniPath users, export it as 32-bit value from the beginning. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 9 ++++++++- include/uapi/rdma/rdma_netlink.h | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 7af71d5e52c8..16f1d28bea69 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -45,6 +45,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, + [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -102,7 +104,12 @@ static int fill_port_info(struct sk_buff *msg, nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, attr.subnet_prefix, 0)) return -EMSGSIZE; - + if (rdma_protocol_ib(device, port)) { + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) + return -EMSGSIZE; + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) + return -EMSGSIZE; + } return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 481003182a35..7d5caaf54126 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -284,6 +284,14 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_SUBNET_PREFIX, /* u64 */ + /* + * Local Identifier (LID), + * According to IB specification, It is 16-bit address assigned + * by the Subnet Manager. Extended to be 32-bit for OmniPath users. + */ + RDMA_NLDEV_ATTR_LID, /* u32 */ + RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 34840fea112d36507c19dc6052b8c6d88bdd9c16 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 28 Jun 2017 15:49:30 +0300 Subject: RDMA/netlink: Export LID mask control (LMC) Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 3 +++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 16f1d28bea69..11546f87c5dc 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -47,6 +47,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, + [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -109,6 +110,8 @@ static int fill_port_info(struct sk_buff *msg, return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) + return -EMSGSIZE; } return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 7d5caaf54126..035706e6b016 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -292,6 +292,11 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_LID, /* u32 */ RDMA_NLDEV_ATTR_SM_LID, /* u32 */ + /* + * LID mask control (LMC) + */ + RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 5654e49db0b2d87c12b6e120b6a830abe3d3921b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 13:12:45 +0300 Subject: RDMA/netlink: Provide port state and physical link state Add port state and physical link state to the users of RDMA netlink. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 6 ++++++ include/uapi/rdma/rdma_netlink.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 11546f87c5dc..32ccb2b88933 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -48,6 +48,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -113,6 +115,10 @@ static int fill_port_info(struct sk_buff *msg, if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) return -EMSGSIZE; } + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) + return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 035706e6b016..c488c3cf361b 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -297,6 +297,9 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_LMC, /* u8 */ + RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ + RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 1bb77b8c1d57149ed0aa6825255ead80ae584034 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 29 Jun 2017 16:01:29 +0300 Subject: RDMA/netlink: Export node_type Add ability to get node_type for RDAM netlink users. Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 3 +++ include/uapi/rdma/rdma_netlink.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 32ccb2b88933..474022274e09 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -50,6 +50,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, }; static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) @@ -79,6 +80,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, be64_to_cpu(device->attrs.sys_image_guid), 0)) return -EMSGSIZE; + if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) + return -EMSGSIZE; return 0; } diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index c488c3cf361b..861440a87e7c 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -300,6 +300,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_PORT_STATE, /* u8 */ RDMA_NLDEV_ATTR_PORT_PHYS_STATE, /* u8 */ + RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_MAX }; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 8ccf098104dcc6fb9036d760f79016d5227a4437 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 14 Aug 2017 11:15:26 -0400 Subject: Revert "RDMA/hns: fix build regression" This reverts commit ecd840ff9b793ac60e3e6658414525535349a17b. --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index aeef2adf1a45..e493a61e14e1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -33,7 +33,6 @@ #ifndef _HNS_ROCE_DEVICE_H #define _HNS_ROCE_DEVICE_H -#include #include #define DRV_NAME "hns_roce" -- cgit v1.2.3 From 3e5f0881f17525e3b49835947a5e0cf2d681b1e2 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 25 Jul 2017 17:29:06 +0300 Subject: IB/hns: Avoid compile test under non 64bit environments The hns driver uses __raw_writeq which is only defined in 64BIT environments. Trying to compile the driver in a 32BIT environment results in errors. Only COMPILE_TEST when 64BIT is defined. Fixes: 7d1b6a678e0b ("IB/hns: Support compile test for hns RoCE driver") Signed-off-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index cbe6b51394fd..61c93bbd230d 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,7 +1,7 @@ config INFINIBAND_HNS tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON - depends on (ARM64 || COMPILE_TEST) && HNS && HNS_DSAF && HNS_ENET + depends on (ARM64 || (COMPILE_TEST && 64BIT)) && HNS && HNS_DSAF && HNS_ENET ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hi1610 and more further ICT SoC. -- cgit v1.2.3 From 836daee2df01b79e9cf54b80b985aea6cff05f57 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sat, 10 Jun 2017 11:19:20 +0200 Subject: cxgb4: Remove some dead code This 'BUG_ON(!ep)' can never trigger because we have: if (!ep) return 0; just a few lines above. So it can be removed safely. Signed-off-by: Christophe JAILLET Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index e49b34c3b136..ceaa2fa54d32 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2871,7 +2871,6 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - BUG_ON(!ep); /* The cm_id may be null if we failed to connect */ mutex_lock(&ep->com.mutex); -- cgit v1.2.3 From b059e2108d7ac2d63001735f11d79ee670d9e3fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 2 Jul 2017 11:20:50 +0300 Subject: RDMA/core: make ib_device.add method optional ib_clients can indeed fill .add to NULL, but then they will not see any device removal notifications. The reason is that that ib_register_client and ib_register_device checked existence of .add before adding the creating a corresponding client_data and adding it to the list. Simple condition reverse fixes the issue. Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a5dfab6adf49..9b7e110e7d8e 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -489,7 +489,7 @@ int ib_register_device(struct ib_device *device, device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) - if (client->add && !add_client_context(device, client)) + if (!add_client_context(device, client) && client->add) client->add(device); down_write(&lists_rwsem); @@ -577,7 +577,7 @@ int ib_register_client(struct ib_client *client) mutex_lock(&device_mutex); list_for_each_entry(device, &device_list, core_list) - if (client->add && !add_client_context(device, client)) + if (!add_client_context(device, client) && client->add) client->add(device); down_write(&lists_rwsem); -- cgit v1.2.3 From cb93e597779e565dfeda34aede83c2565ddadc97 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 15 Aug 2017 22:20:37 +0300 Subject: cm: Don't allocate ib_cm workqueue with WQ_MEM_RECLAIM create_workqueue always creates the workqueue with WQ_MEM_RECLAIM and silences a flush dependency warn for WQ_LEGACY. Instead, we want to keep the warn in case the allocator tries to flush the cm workqueue because its very likely that cm work execution will yield memory allocations (for example cm connection requests). Reported-by: Steve Wise Reviewed-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2b4d613a3474..838e3507eadc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4201,7 +4201,7 @@ static int __init ib_cm_init(void) goto error1; } - cm.wq = create_workqueue("ib_cm"); + cm.wq = alloc_workqueue("ib_cm", 0, 1); if (!cm.wq) { ret = -ENOMEM; goto error2; -- cgit v1.2.3 From 75215e5bb22c9c502e47398bdc35c247b1469809 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 15 Aug 2017 22:20:38 +0300 Subject: iwcm: Don't allocate iwcm workqueue with WQ_MEM_RECLAIM Its very likely that iwcm work execution will yield memory allocations (for example cm connection request). Reported-by: Steve Wise Reviewed-by: Steve Wise Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 31661b5c1743..ff6d7bc44c1f 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1181,7 +1181,7 @@ static int __init iw_cm_init(void) if (ret) pr_err("iw_cm: couldn't register netlink callbacks\n"); - iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); if (!iwcm_wq) return -ENOMEM; -- cgit v1.2.3 From 055bb279065f00205281956d2a01777d17d23c7c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 3 Jul 2017 10:23:47 +0100 Subject: IB/qib: fix spelling mistake: "failng" -> "failing" Trivial fix to spelling mistake in qib_dev_err error message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6c16ba1107ba..cce4ab468b29 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -399,7 +399,7 @@ static int loadtime_init(struct qib_devdata *dd) if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { qib_dev_err(dd, - "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + "Driver only handles version %d, chip swversion is %d (%llx), failing\n", QIB_CHIP_SWVERSION, (int)(dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & -- cgit v1.2.3 From a63aa5dbed4eda4bec97556f4f0d70b4247fc9d1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 13 Jul 2017 23:13:38 +0100 Subject: IB/hfi1: fix spelling mistake in variable name continious Trivial fix to spelling mistake, rename variable 'continious' to the correct spelling 'continuous' Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 94b54850ec75..ebbc52d668e7 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7330,7 +7330,7 @@ void handle_verify_cap(struct work_struct *work) struct hfi1_devdata *dd = ppd->dd; u64 reg; u8 power_management; - u8 continious; + u8 continuous; u8 vcu; u8 vau; u8 z; @@ -7349,7 +7349,7 @@ void handle_verify_cap(struct work_struct *work) lcb_shutdown(dd, 0); adjust_lcb_for_fpga_serdes(dd); - read_vc_remote_phy(dd, &power_management, &continious); + read_vc_remote_phy(dd, &power_management, &continuous); read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf, &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); @@ -7363,7 +7363,7 @@ void handle_verify_cap(struct work_struct *work) get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, "Peer PHY: power management 0x%x, continuous updates 0x%x\n", - (int)power_management, (int)continious); + (int)power_management, (int)continuous); dd_dev_info(dd, "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n", (int)vau, (int)z, (int)vcu, (int)vl15buf, -- cgit v1.2.3 From 24bb4d86887fd7730737b790bdb1b4f415b4b241 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 14 Jul 2017 08:30:10 +0100 Subject: RDMA/bnxt_re: fix spelling mistake: "Deallocte" -> "Deallocate" Trivial fix to spelling mistake in dev_err error message Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index f0e01b3ac711..36a88e32b4f2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3410,7 +3410,7 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) &rdev->qplib_res.dpi_tbl, &uctx->dpi); if (rc) - dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!"); + dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!"); /* Don't fail, continue*/ uctx->dpi.dbr = NULL; } -- cgit v1.2.3 From 92d50fc1602ecef44babe411c475344e55e1cdd9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 19 Jul 2017 15:01:06 +0200 Subject: PCI/IB: add support for pci driver attribute groups Some drivers (specifically the nes IB driver), want to create a lot of sysfs driver attributes. Instead of open-coding the creation and removal of these files (and getting it wrong btw), it's a better idea to let the driver core handle all of this logic for us. So add a new field to the pci driver structure, **groups, that allows pci drivers to specify an attribute group list it wishes to have created when it is registered with the driver core. Big bonus is now the driver doesn't race with userspace when the sysfs files are created vs. when the kobject is announced, so any script/tool that actually wanted to use these files will not have to poll waiting for them to show up. Cc: Faisal Latif Cc: Doug Ledford Cc: Sean Hefty Cc: Hal Rosenstock Cc: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 67 ++++++++++++++--------------------------- drivers/pci/pci-driver.c | 1 + include/linux/pci.h | 1 + 3 files changed, 25 insertions(+), 44 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index a30aa6527f7e..84162e9ae5c0 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -808,13 +808,6 @@ static void nes_remove(struct pci_dev *pcidev) } -static struct pci_driver nes_pci_driver = { - .name = DRV_NAME, - .id_table = nes_pci_table, - .probe = nes_probe, - .remove = nes_remove, -}; - static ssize_t adapter_show(struct device_driver *ddp, char *buf) { unsigned int devfn = 0xffffffff; @@ -1156,35 +1149,29 @@ static DRIVER_ATTR_RW(idx_addr); static DRIVER_ATTR_RW(idx_data); static DRIVER_ATTR_RW(wqm_quanta); -static int nes_create_driver_sysfs(struct pci_driver *drv) -{ - int error; - error = driver_create_file(&drv->driver, &driver_attr_adapter); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data); - error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd); - error |= driver_create_file(&drv->driver, &driver_attr_flash_data); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); - error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); - error |= driver_create_file(&drv->driver, &driver_attr_idx_data); - error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); - return error; -} +static struct attribute *nes_attrs[] = { + &driver_attr_adapter.attr, + &driver_attr_eeprom_cmd.attr, + &driver_attr_eeprom_data.attr, + &driver_attr_flash_cmd.attr, + &driver_attr_flash_data.attr, + &driver_attr_nonidx_addr.attr, + &driver_attr_nonidx_data.attr, + &driver_attr_idx_addr.attr, + &driver_attr_idx_data.attr, + &driver_attr_wqm_quanta.attr, + NULL, +}; +ATTRIBUTE_GROUPS(nes); + +static struct pci_driver nes_pci_driver = { + .name = DRV_NAME, + .id_table = nes_pci_table, + .probe = nes_probe, + .remove = nes_remove, + .groups = nes_groups, +}; -static void nes_remove_driver_sysfs(struct pci_driver *drv) -{ - driver_remove_file(&drv->driver, &driver_attr_adapter); - driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd); - driver_remove_file(&drv->driver, &driver_attr_eeprom_data); - driver_remove_file(&drv->driver, &driver_attr_flash_cmd); - driver_remove_file(&drv->driver, &driver_attr_flash_data); - driver_remove_file(&drv->driver, &driver_attr_nonidx_addr); - driver_remove_file(&drv->driver, &driver_attr_nonidx_data); - driver_remove_file(&drv->driver, &driver_attr_idx_addr); - driver_remove_file(&drv->driver, &driver_attr_idx_data); - driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); -} /** * nes_init_module - module initialization entry point @@ -1192,20 +1179,13 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) static int __init nes_init_module(void) { int retval; - int retval1; retval = nes_cm_start(); if (retval) { printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n"); return retval; } - retval = pci_register_driver(&nes_pci_driver); - if (retval >= 0) { - retval1 = nes_create_driver_sysfs(&nes_pci_driver); - if (retval1 < 0) - printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n"); - } - return retval; + return pci_register_driver(&nes_pci_driver); } @@ -1215,7 +1195,6 @@ static int __init nes_init_module(void) static void __exit nes_exit_module(void) { nes_cm_stop(); - nes_remove_driver_sysfs(&nes_pci_driver); pci_unregister_driver(&nes_pci_driver); } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index d51e8738f9c2..4450feaf5c00 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1307,6 +1307,7 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.bus = &pci_bus_type; drv->driver.owner = owner; drv->driver.mod_name = mod_name; + drv->driver.groups = drv->groups; spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..c421af60f2df 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -729,6 +729,7 @@ struct pci_driver { void (*shutdown) (struct pci_dev *dev); int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ const struct pci_error_handlers *err_handler; + const struct attribute_group **groups; struct device_driver driver; struct pci_dynids dynids; }; -- cgit v1.2.3 From e0604df111e57773f45c50d143fedb0374bfe916 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:44 +0530 Subject: infiniband: mthca: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 13067 805 4 13876 3634 infiniband/hw/mthca/mthca_main.o File size After adding 'const': text data bss dec hex filename 13419 453 4 13876 3634 infiniband/hw/mthca/mthca_main.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index c309e5c96383..5695cda52a07 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1162,7 +1162,7 @@ static void mthca_remove_one(struct pci_dev *pdev) mutex_unlock(&mthca_device_mutex); } -static struct pci_device_id mthca_pci_table[] = { +static const struct pci_device_id mthca_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), .driver_data = TAVOR }, { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR), -- cgit v1.2.3 From edb566913b9591910a0d1dafd53e5d4de3212b55 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:45 +0530 Subject: infiniband: nes: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10429 780 33 11242 2bea drivers/infiniband/hw/nes/nes.o File size After adding 'const': text data bss dec hex filename 10541 668 33 11242 2bea drivers/infiniband/hw/nes/nes.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 84162e9ae5c0..b00eac1b899f 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -102,7 +102,7 @@ static unsigned int ee_flsh_adapter; static unsigned int sysfs_nonidx_addr; static unsigned int sysfs_idx_addr; -static struct pci_device_id nes_pci_table[] = { +static const struct pci_device_id nes_pci_table[] = { { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, {0} -- cgit v1.2.3 From 7806def081b2185a488fb31bd1deb4f5a3fe4765 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Sun, 16 Jul 2017 12:00:46 +0530 Subject: infiniband: pvrdma: constify pci_device_id. pci_device_id are not supposed to change at runtime. All functions working with pci_device_id provided by work with const pci_device_id. So mark the non-const structs as const. File size before: text data bss dec hex filename 10774 1872 8 12654 316e infiniband/hw/vmw_pvrdma/pvrdma_main.o File size After adding 'const': text data bss dec hex filename 10838 1808 8 12654 316e infiniband/hw/vmw_pvrdma/pvrdma_main.o Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 34ebc7615411..2b8353070925 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -1078,7 +1078,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } -static struct pci_device_id pvrdma_pci_table[] = { +static const struct pci_device_id pvrdma_pci_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), }, { 0 }, }; -- cgit v1.2.3 From 02654b5ae1a45c0e31060816231086685cfcd841 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 16 Jul 2017 13:09:23 +0200 Subject: i40iw: Simplify code Axe a few lines of code and re-use existing error handling path to avoid code duplication. Signed-off-by: Christophe JAILLET Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_pble.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index c87ba1617087..540aab5e502d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -269,10 +269,8 @@ static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev, status = i40iw_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, I40IW_SD_TYPE_PAGED, I40IW_HMC_DIRECT_BP_SIZE); - if (status) { - i40iw_free_vmalloc_mem(dev->hw, chunk); - return status; - } + if (status) + goto error; if (!dev->is_pf) { status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE, fpm_to_idx(pble_rsrc, @@ -280,8 +278,7 @@ static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev, (info->pages << PBLE_512_SHIFT)); if (status) { i40iw_pr_err("allocate PBLEs in the PF. Error %i\n", status); - i40iw_free_vmalloc_mem(dev->hw, chunk); - return status; + goto error; } } addr = chunk->vaddr; -- cgit v1.2.3 From 83fb1c89e7ee5bb16397b294ccfbd65a9a22e402 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 19 Jul 2017 13:55:26 -0500 Subject: i40iw: Fixes for static checker warnings Remove NULL check for cm_node->listener in i40iw_accept as listener is always present at this point. Remove the check for cm_node->accept_pend and related code in i40iw_cm_event_connected as the cm_node in this context is only pertinent to active node and cm_node->accept_pend is always 0. This fixes the following smatch warnings, drivers/infiniband/hw/i40iw/i40iw_cm.c:3691 i40iw_accept() error: we previously assumed 'cm_node->listener' could be null drivers/infiniband/hw/i40iw/i40iw_cm.c:4061 i40iw_cm_event_connected() error: we previously assumed 'cm_node->listener' could be null Reported-by: Dan Carpenter Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5a2fa743676c..a2b135039e5a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3687,8 +3687,6 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->accelerated = 1; if (cm_node->accept_pend) { - if (!cm_node->listener) - i40iw_pr_err("cm_node->listener NULL for passive node\n"); atomic_dec(&cm_node->listener->pend_accepts_cnt); cm_node->accept_pend = 0; } @@ -4056,12 +4054,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event) i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); cm_node->accelerated = 1; - if (cm_node->accept_pend) { - if (!cm_node->listener) - i40iw_pr_err("listener is null for passive node\n"); - atomic_dec(&cm_node->listener->pend_accepts_cnt); - cm_node->accept_pend = 0; - } + return; error: -- cgit v1.2.3 From 8b62cbd13a245f59282ba27431adf4caa674dd0b Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Fri, 21 Jul 2017 22:14:09 +0300 Subject: IB/rxe: Convert pr_info to pr_warn This message is warning so let's print it accordingly. Signed-off-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_av.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 5bddf469361b..f8172eb993cc 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -49,7 +49,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) u8 sgid_index = rdma_ah_read_grh(attr)->sgid_index; if (sgid_index > port->attr.gid_tbl_len) { - pr_info("invalid sgid index = %d\n", sgid_index); + pr_warn("invalid sgid index = %d\n", sgid_index); return -EINVAL; } } -- cgit v1.2.3 From 660b1de13ce8a56cf8c1700774a962e36222c0f9 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Fri, 21 Jul 2017 22:20:50 +0300 Subject: IB/rxe: Remove unneeded check Port validation is performed in ib_core, no need to duplicate it here. Signed-off-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_av.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index f8172eb993cc..1cc9e2e1365d 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -38,11 +38,6 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) { struct rxe_port *port; - if (rdma_ah_get_port_num(attr) != 1) { - pr_info("invalid port_num = %d\n", rdma_ah_get_port_num(attr)); - return -EINVAL; - } - port = &rxe->port; if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { -- cgit v1.2.3 From 5a5a3d0cfe6b3c99585d7763cd966ec1654bc4e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 21 Jul 2017 23:19:33 +0100 Subject: i40iw: fix spelling mistake: "allloc_buf" -> "alloc_buf" Trivial fix to spelling mistake in i40iw_debug message and also split up a couple of lines that are too long and cause checkpatch warnings Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 71050c5d29a0..40c3137a7325 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -949,14 +949,16 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, ret = i40iw_puda_qp_create(rsrc); } if (ret) { - i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__); + i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", + __func__); goto error; } rsrc->completion = PUDA_QP_CREATED; ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size); if (ret) { - i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__); + i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error alloc_buf\n", + __func__); goto error; } -- cgit v1.2.3 From 699a2d5b1b880b4e4e1c7d55fa25659322cf5b51 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Tue, 1 Aug 2017 10:58:35 +0530 Subject: RDMA/uverbs: Initialize cq_context appropriately Initializing cq_context with ev_queue in create_cq(), leads to NULL pointer dereference in ib_uverbs_comp_handler(), if application doesnot use completion channel. This patch fixes the cq_context initialization. Fixes: 1e7710f3f65 ("IB/core: Change completion channel to use the reworked") Signed-off-by: Potnuri Bharat Teja Reviewed-by: Matan Barak Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..50a6c64f0388 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1015,7 +1015,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = &ev_file->ev_queue; + cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; -- cgit v1.2.3 From 238f43a79799c76a266e89eb123376d36f24c3e5 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 2 Aug 2017 15:31:29 +0530 Subject: IB/qib: add const to bin_attribute structures Add const to bin_attribute structures as they are only passed to the functions sysfs_{remove/create}_bin_file. The arguments passed are of type const, so declare the structures to be const. Done using Coccinelle. @m disable optional_qualifier@ identifier s; position p; @@ static struct bin_attribute s@p={...}; @okay1@ position p; identifier m.s; @@ ( sysfs_create_bin_file(...,&s@p,...) | sysfs_remove_bin_file(...,&s@p,...) ) @bad@ position p!={m.p,okay1.p}; identifier m.s; @@ s@p @change depends on !bad disable optional_qualifier@ identifier m.s; @@ static +const struct bin_attribute s={...}; Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index fe4cf5e4acec..ca2638d8f35e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -247,7 +247,7 @@ static struct kobj_type qib_port_cc_ktype = { .release = qib_port_release, }; -static struct bin_attribute cc_table_bin_attr = { +static const struct bin_attribute cc_table_bin_attr = { .attr = {.name = "cc_table_bin", .mode = 0444}, .read = read_cc_table_bin, .size = PAGE_SIZE, @@ -286,7 +286,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute cc_setting_bin_attr = { +static const struct bin_attribute cc_setting_bin_attr = { .attr = {.name = "cc_settings_bin", .mode = 0444}, .read = read_cc_setting_bin, .size = PAGE_SIZE, -- cgit v1.2.3 From 9d6fd7aca149e2c95915447f76b125dc2123db45 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Wed, 2 Aug 2017 15:31:30 +0530 Subject: IB/hfi1: add const to bin_attribute structures Add const to bin_attribute structures as they are only passed to the functions sysfs_{remove/create}_bin_file. The arguments passed are of type const, so declare the structures to be const. Done using Coccinelle. @m disable optional_qualifier@ identifier s; position p; @@ static struct bin_attribute s@p={...}; @okay1@ position p; identifier m.s; @@ ( sysfs_create_bin_file(...,&s@p,...) | sysfs_remove_bin_file(...,&s@p,...) ) @bad@ position p!={m.p,okay1.p}; identifier m.s; @@ s@p @change depends on !bad disable optional_qualifier@ identifier m.s; @@ static +const struct bin_attribute s={...}; Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 2f3bbcac1e34..6d2702ef34ac 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -95,7 +95,7 @@ static void port_release(struct kobject *kobj) /* nothing to do since memory is freed by hfi1_free_devdata() */ } -static struct bin_attribute cc_table_bin_attr = { +static const struct bin_attribute cc_table_bin_attr = { .attr = {.name = "cc_table_bin", .mode = 0444}, .read = read_cc_table_bin, .size = PAGE_SIZE, @@ -137,7 +137,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute cc_setting_bin_attr = { +static const struct bin_attribute cc_setting_bin_attr = { .attr = {.name = "cc_settings_bin", .mode = 0444}, .read = read_cc_setting_bin, .size = PAGE_SIZE, -- cgit v1.2.3 From 62ede7779904bc75bdd84f1ff0016113956ce3b4 Mon Sep 17 00:00:00 2001 From: "Hiatt, Don" Date: Mon, 14 Aug 2017 14:17:43 -0400 Subject: Add OPA extended LID support This patch series primarily increases sizes of variables that hold lid values from 16 to 32 bits. Additionally, it adds a check in the IB mad stack to verify a properly formatted MAD when OPA extended LIDs are used. Signed-off-by: Don Hiatt Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/user_mad.c | 2 +- drivers/infiniband/core/uverbs_cmd.c | 11 ++++++----- drivers/infiniband/hw/hfi1/mad.c | 2 +- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 8 ++++---- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_mad.c | 4 ++-- drivers/infiniband/sw/rdmavt/cq.c | 2 +- include/rdma/ib_verbs.h | 26 ++++++++++++++++++++------ 11 files changed, 41 insertions(+), 26 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 7a389697e2ec..fa3b0a428195 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1770,7 +1770,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { - req_msg->primary_local_lid = ib_slid_be16(wc->slid); + req_msg->primary_local_lid = ib_lid_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } @@ -1780,7 +1780,7 @@ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { - req_msg->alt_local_lid = ib_slid_be16(wc->slid); + req_msg->alt_local_lid = ib_lid_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index ff3c67a7aaad..c1696e6084b2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -229,7 +229,7 @@ static void recv_handler(struct ib_mad_agent *agent, packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); - packet->mad.hdr.lid = ib_slid_be16(mad_recv_wc->wc->slid); + packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 39a0f1dc84e4..a21881e22bad 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -275,12 +275,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; + if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { - resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); + resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); } else { - resp.lid = (u16)attr.lid; - resp.sm_lid = (u16)attr.sm_lid; + resp.lid = ib_lid_cpu16(attr.lid); + resp.sm_lid = ib_lid_cpu16(attr.sm_lid); } resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; @@ -1206,9 +1207,9 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest, tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; if (rdma_cap_opa_ah(ib_dev, wc->port_num)) - tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); + tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid); else - tmp.slid = ib_slid_cpu16(wc->slid); + tmp.slid = ib_lid_cpu16(wc->slid); tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 8daa3a5f7e95..11be4d19e607 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4216,7 +4216,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = ib_slid_cpu16(in_wc->slid); + u16 slid = ib_lid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 5a897b0106a9..0e4f60cfc59d 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -528,7 +528,7 @@ static int set_guid_rec(struct ib_device *ibdev, memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); - guid_info_rec.lid = cpu_to_be16((u16)attr.lid); + guid_info_rec.lid = ib_lid_be16(attr.lid); guid_info_rec.block_num = index; memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 04fb44e7699e..0793a21d76f4 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, op_modifier |= 0x4; - in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; + in_modifier |= ib_lid_cpu16(in_wc->slid) << 16; } err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, @@ -625,7 +625,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); } else { tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = ib_slid_be16(wc->slid); + tun_mad->hdr.slid_mac_47_32 = ib_lid_be16(wc->slid); } ib_dma_sync_single_for_device(&dev->ib_dev, @@ -826,7 +826,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } } - slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); @@ -860,7 +860,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = (u16)pattr.lid; + prev_lid = ib_lid_cpu16(pattr.lid); err = mlx4_MAD_IFC(to_mdev(ibdev), (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index cd2264ac88ae..18cfe5bf0fa3 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -78,7 +78,7 @@ static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 slid; int err; - slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index e19ae0b9b439..d0f062fc2a4b 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1921,7 +1921,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET); - MTHCA_PUT(inbox, ib_slid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, ib_lid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET); MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) @@ -1929,7 +1929,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, op_modifier |= 0x4; - in_modifier |= ib_slid_cpu16(in_wc->slid) << 16; + in_modifier |= ib_lid_cpu16(in_wc->slid) << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index a9caadab22cf..093f7755c843 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -205,7 +205,7 @@ int mthca_process_mad(struct ib_device *ibdev, u16 *out_mad_pkey_index) { int err; - u16 slid = in_wc ? ib_slid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; const struct ib_mad *in_mad = (const struct ib_mad *)in; @@ -256,7 +256,7 @@ int mthca_process_mad(struct ib_device *ibdev, in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) - prev_lid = (u16)pattr.lid; + prev_lid = ib_lid_cpu16(pattr.lid); err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0335a3df74d5..97d71e49c092 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) wc->uqueue[head].src_qp = entry->src_qp; wc->uqueue[head].wc_flags = entry->wc_flags; wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = ib_slid_cpu16(entry->slid); + wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); wc->uqueue[head].sl = entry->sl; wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; wc->uqueue[head].port_num = entry->port_num; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4db4ad56ace6..70a183179224 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3724,16 +3724,30 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_IB; } -/* Return slid in 16bit CPU encoding */ -static inline u16 ib_slid_cpu16(u32 slid) +/** + * ib_lid_cpu16 - Return lid in 16bit CPU encoding. + * In the current implementation the only way to get + * get the 32bit lid is from other sources for OPA. + * For IB, lids will always be 16bits so cast the + * value accordingly. + * + * @lid: A 32bit LID + */ +static inline u16 ib_lid_cpu16(u32 lid) { - return (u16)slid; + WARN_ON_ONCE(lid & 0xFFFF0000); + return (u16)lid; } -/* Return slid in 16bit BE encoding */ -static inline u16 ib_slid_be16(u32 slid) +/** + * ib_lid_be16 - Return lid in 16bit BE encoding. + * + * @lid: A 32bit LID + */ +static inline __be16 ib_lid_be16(u32 lid) { - return cpu_to_be16((u16)slid); + WARN_ON_ONCE(lid & 0xFFFF0000); + return cpu_to_be16((u16)lid); } /** -- cgit v1.2.3 From 6a5df91baf2528e584bf4493c30bbafe2db74c9e Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 2 Aug 2017 01:46:18 -0700 Subject: RDMA/bnxt_re: Allocate multiple notification queues Enables multiple Interrupt vectors. Driver is requesting the max MSIX vectors based on the number of online cpus and creates upto 9 MSIx vectors (1 for control path and 8 for data path). A tasklet is created for each of these vectors. NQs are assigned to CQs in round robin fashion. This patch also adds IRQ affinity hint for the MSIX vector of each NQ. Signed-off-by: Ray Jui Signed-off-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 5 +- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 17 +++-- drivers/infiniband/hw/bnxt_re/main.c | 106 +++++++++++++++++++------------ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 21 +++++- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 4 +- 5 files changed, 104 insertions(+), 49 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 85527532c49d..b3ad37fec578 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -85,7 +85,7 @@ struct bnxt_re_sqp_entries { }; #define BNXT_RE_MIN_MSIX 2 -#define BNXT_RE_MAX_MSIX 16 +#define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 @@ -116,7 +116,7 @@ struct bnxt_re_dev { struct bnxt_qplib_rcfw rcfw; /* NQ */ - struct bnxt_qplib_nq nq; + struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; /* Device Resources */ struct bnxt_qplib_dev_attr dev_attr; @@ -140,6 +140,7 @@ struct bnxt_re_dev { struct bnxt_re_qp *qp1_sqp; struct bnxt_re_ah *sqp_ah; struct bnxt_re_sqp_entries sqp_tbl[1024]; + atomic_t nq_alloc_cnt; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 58461ee3773d..24e2785774b8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2290,6 +2290,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq) struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq); struct bnxt_re_dev *rdev = cq->rdev; int rc; + struct bnxt_qplib_nq *nq = cq->qplib_cq.nq; rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2304,7 +2305,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq) kfree(cq); } atomic_dec(&rdev->cq_count); - rdev->nq.budget--; + nq->budget--; return 0; } @@ -2318,6 +2319,8 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, struct bnxt_re_cq *cq = NULL; int rc, entries; int cqe = attr->cqe; + struct bnxt_qplib_nq *nq = NULL; + unsigned int nq_alloc_cnt; /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { @@ -2369,9 +2372,15 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, cq->qplib_cq.sghead = NULL; cq->qplib_cq.nmap = 0; } + /* + * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a + * used for getting the NQ index. + */ + nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt); + nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)]; cq->qplib_cq.max_wqe = entries; - cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id; - cq->qplib_cq.nq = &rdev->nq; + cq->qplib_cq.cnq_hw_ring_id = nq->ring_id; + cq->qplib_cq.nq = nq; rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { @@ -2381,7 +2390,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, cq->ib_cq.cqe = entries; cq->cq_period = cq->qplib_cq.period; - rdev->nq.budget++; + nq->budget++; atomic_inc(&rdev->cq_count); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 2f71123e0a9f..5b78d8fc28dc 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -161,7 +161,7 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) { - int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got; + int rc = 0, num_msix_want = BNXT_RE_MAX_MSIX, num_msix_got; struct bnxt_en_dev *en_dev; if (!rdev) @@ -169,6 +169,8 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) en_dev = rdev->en_dev; + num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus()); + rtnl_lock(); num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, rdev->msix_entries, @@ -651,8 +653,12 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { - if (rdev->nq.hwq.max_elements) - bnxt_qplib_disable_nq(&rdev->nq); + int i; + + if (rdev->nq[0].hwq.max_elements) { + for (i = 1; i < rdev->num_msix; i++) + bnxt_qplib_disable_nq(&rdev->nq[i - 1]); + } if (rdev->qplib_res.rcfw) bnxt_qplib_cleanup_res(&rdev->qplib_res); @@ -660,31 +666,41 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) static int bnxt_re_init_res(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc = 0, i; bnxt_qplib_init_res(&rdev->qplib_res); - if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0) - return -EINVAL; + for (i = 1; i < rdev->num_msix ; i++) { + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], + i - 1, rdev->msix_entries[i].vector, + rdev->msix_entries[i].db_offset, + &bnxt_re_cqn_handler, NULL); - rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq, - rdev->msix_entries[BNXT_RE_NQ_IDX].vector, - rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset, - &bnxt_re_cqn_handler, - NULL); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to enable NQ with rc = 0x%x", rc); + goto fail; + } + } + return 0; +fail: + return rc; +} - if (rc) - dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc); +static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait) +{ + int i; - return rc; + for (i = 0; i < rdev->num_msix - 1; i++) { + bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait); + bnxt_qplib_free_nq(&rdev->nq[i]); + } } static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) { - if (rdev->nq.hwq.max_elements) { - bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait); - bnxt_qplib_free_nq(&rdev->nq); - } + bnxt_re_free_nq_res(rdev, lock_wait); + if (rdev->qplib_res.dpi_tbl.max) { bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &rdev->qplib_res.dpi_tbl, @@ -698,7 +714,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc = 0, i; /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; @@ -715,30 +731,42 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) &rdev->dpi_privileged, rdev); if (rc) - goto fail; + goto dealloc_res; - rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + - BNXT_RE_MAX_SRQC_COUNT + 2; - rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate NQ memory: %#x", rc); - goto fail; - } - rc = bnxt_re_net_ring_alloc - (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr, - rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count, - HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1, - rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx, - &rdev->nq.ring_id); - if (rc) { - dev_err(rdev_to_dev(rdev), - "Failed to allocate NQ ring: %#x", rc); - goto free_nq; + for (i = 0; i < rdev->num_msix - 1; i++) { + rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT + + BNXT_RE_MAX_SRQC_COUNT + 2; + rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]); + if (rc) { + dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", + i, rc); + goto dealloc_dpi; + } + rc = bnxt_re_net_ring_alloc + (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, + rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count, + HWRM_RING_ALLOC_CMPL, + BNXT_QPLIB_NQE_MAX_CNT - 1, + rdev->msix_entries[i + 1].ring_idx, + &rdev->nq[i].ring_id); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to allocate NQ fw id with rc = 0x%x", + rc); + goto free_nq; + } } return 0; free_nq: - bnxt_qplib_free_nq(&rdev->nq); + for (i = 0; i < rdev->num_msix - 1; i++) + bnxt_qplib_free_nq(&rdev->nq[i]); +dealloc_dpi: + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, + &rdev->qplib_res.dpi_tbl, + &rdev->dpi_privileged); +dealloc_res: + bnxt_qplib_free_res(&rdev->qplib_res); + fail: rdev->qplib_res.rcfw = NULL; return rc; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 31e15f31bba5..e8afc47f8949 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -365,6 +365,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) tasklet_kill(&nq->worker); if (nq->requested) { + irq_set_affinity_hint(nq->vector, NULL); free_irq(nq->vector, nq); nq->requested = false; } @@ -378,7 +379,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) } int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, - int msix_vector, int bar_reg_offset, + int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *), int (*srqn_handler)(struct bnxt_qplib_nq *nq, @@ -402,13 +403,25 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, goto fail; nq->requested = false; - rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq); + memset(nq->name, 0, 32); + sprintf(nq->name, "bnxt_qplib_nq-%d", nq_idx); + rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq); if (rc) { dev_err(&nq->pdev->dev, "Failed to request IRQ for NQ: %#x", rc); bnxt_qplib_disable_nq(nq); goto fail; } + + cpumask_clear(&nq->mask); + cpumask_set_cpu(nq_idx, &nq->mask); + rc = irq_set_affinity_hint(nq->vector, &nq->mask); + if (rc) { + dev_warn(&nq->pdev->dev, + "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", + nq->vector, nq_idx); + } + nq->requested = true; nq->bar_reg = NQ_CONS_PCI_BAR_REGION; nq->bar_reg_off = bar_reg_offset; @@ -432,8 +445,10 @@ fail: void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq) { - if (nq->hwq.max_elements) + if (nq->hwq.max_elements) { bnxt_qplib_free_hwq(nq->pdev, &nq->hwq); + nq->hwq.max_elements = 0; + } } int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 23a26d5cbe18..8ead70ca1c1d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -407,6 +407,7 @@ struct bnxt_qplib_nq { struct pci_dev *pdev; int vector; + cpumask_t mask; int budget; bool requested; struct tasklet_struct worker; @@ -425,6 +426,7 @@ struct bnxt_qplib_nq { void *srq, u8 event); struct workqueue_struct *cqn_wq; + char name[32]; }; struct bnxt_qplib_nq_work { @@ -435,7 +437,7 @@ struct bnxt_qplib_nq_work { void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq); int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, - int msix_vector, int bar_reg_offset, + int nq_idx, int msix_vector, int bar_reg_offset, int (*cqn_handler)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq), int (*srqn_handler)(struct bnxt_qplib_nq *nq, -- cgit v1.2.3 From 225937d6ccff3ba49b7065672ce35f21465aaeb9 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 2 Aug 2017 01:46:19 -0700 Subject: RDMA/bnxt_re: Implement the alloc/get_hw_stats callback Expose HW counters using the get_hw_stats callback Signed-off-by: Somnath Kotur Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/Makefile | 2 +- drivers/infiniband/hw/bnxt_re/hw_counters.c | 114 ++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/hw_counters.h | 62 +++++++++++++++ drivers/infiniband/hw/bnxt_re/main.c | 4 + 4 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/hw/bnxt_re/hw_counters.c create mode 100644 drivers/infiniband/hw/bnxt_re/hw_counters.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile index 036f84efbc73..afbaa0e20670 100644 --- a/drivers/infiniband/hw/bnxt_re/Makefile +++ b/drivers/infiniband/hw/bnxt_re/Makefile @@ -3,4 +3,4 @@ ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o bnxt_re-y := main.o ib_verbs.o \ qplib_res.o qplib_rcfw.o \ - qplib_sp.o qplib_fp.o + qplib_sp.o qplib_fp.o hw_counters.o diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c new file mode 100644 index 000000000000..7b28219eba46 --- /dev/null +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -0,0 +1,114 @@ +/* + * Broadcom NetXtreme-E RoCE driver. + * + * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Statistics + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "bnxt_ulp.h" +#include "roce_hsi.h" +#include "qplib_res.h" +#include "qplib_sp.h" +#include "qplib_fp.h" +#include "qplib_rcfw.h" +#include "bnxt_re.h" +#include "hw_counters.h" + +static const char * const bnxt_re_stat_name[] = { + [BNXT_RE_ACTIVE_QP] = "active_qps", + [BNXT_RE_ACTIVE_SRQ] = "active_srqs", + [BNXT_RE_ACTIVE_CQ] = "active_cqs", + [BNXT_RE_ACTIVE_MR] = "active_mrs", + [BNXT_RE_ACTIVE_MW] = "active_mws", + [BNXT_RE_RX_PKTS] = "rx_pkts", + [BNXT_RE_RX_BYTES] = "rx_bytes", + [BNXT_RE_TX_PKTS] = "tx_pkts", + [BNXT_RE_TX_BYTES] = "tx_bytes", + [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors" +}; + +int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index) +{ + struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); + struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; + + if (!port || !stats) + return -EINVAL; + + stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count); + stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count); + stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); + stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); + stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); + if (bnxt_re_stats) { + stats->value[BNXT_RE_RECOVERABLE_ERRORS] = + le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); + stats->value[BNXT_RE_RX_PKTS] = + le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); + stats->value[BNXT_RE_RX_BYTES] = + le64_to_cpu(bnxt_re_stats->rx_ucast_bytes); + stats->value[BNXT_RE_TX_PKTS] = + le64_to_cpu(bnxt_re_stats->tx_ucast_pkts); + stats->value[BNXT_RE_TX_BYTES] = + le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); + } + return ARRAY_SIZE(bnxt_re_stat_name); +} + +struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num) +{ + BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS); + /* We support only per port stats */ + if (!port_num) + return NULL; + + return rdma_alloc_hw_stats_struct(bnxt_re_stat_name, + ARRAY_SIZE(bnxt_re_stat_name), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h new file mode 100644 index 000000000000..be0dc0093b58 --- /dev/null +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -0,0 +1,62 @@ +/* + * Broadcom NetXtreme-E RoCE driver. + * + * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term + * Broadcom refers to Broadcom Limited and/or its subsidiaries. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Description: Statistics (header) + * + */ + +#ifndef __BNXT_RE_HW_STATS_H__ +#define __BNXT_RE_HW_STATS_H__ + +enum bnxt_re_hw_stats { + BNXT_RE_ACTIVE_QP, + BNXT_RE_ACTIVE_SRQ, + BNXT_RE_ACTIVE_CQ, + BNXT_RE_ACTIVE_MR, + BNXT_RE_ACTIVE_MW, + BNXT_RE_RX_PKTS, + BNXT_RE_RX_BYTES, + BNXT_RE_TX_PKTS, + BNXT_RE_TX_BYTES, + BNXT_RE_RECOVERABLE_ERRORS, + BNXT_RE_NUM_COUNTERS +}; + +struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev, + u8 port_num); +int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, + struct rdma_hw_stats *stats, + u8 port, int index); +#endif /* __BNXT_RE_HW_STATS_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 5b78d8fc28dc..82d1cbc27aee 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -64,6 +64,8 @@ #include "ib_verbs.h" #include #include "bnxt.h" +#include "hw_counters.h" + static char version[] = BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n"; @@ -513,6 +515,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; ibdev->mmap = bnxt_re_mmap; + ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; + ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; return ib_register_device(ibdev, NULL); } -- cgit v1.2.3 From 11880a5512ac51b9326507df9acc42a8bd13e2b7 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 22 Aug 2017 13:46:56 +0200 Subject: IB/mthca: Replace PCI pool old API The PCI pool API is deprecated. This commit replaces the PCI pool old API by the appropriate function with the DMA pool API. Signed-off-by: Romain Perier Acked-by: Peter Senna Tschudin Tested-by: Peter Senna Tschudin Acked-by: Doug Ledford Tested-by: Doug Ledford Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_av.c | 10 +++++----- drivers/infiniband/hw/mthca/mthca_cmd.c | 8 ++++---- drivers/infiniband/hw/mthca/mthca_dev.h | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 2aec9908c40a..e7f6223e9c60 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -186,7 +186,7 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { - ah->av = pci_pool_zalloc(dev->av_table.pool, + ah->av = dma_pool_zalloc(dev->av_table.pool, GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -250,7 +250,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) break; case MTHCA_AH_PCI_POOL: - pci_pool_free(dev->av_table.pool, ah->av, ah->avdma); + dma_pool_free(dev->av_table.pool, ah->av, ah->avdma); break; case MTHCA_AH_KMALLOC: @@ -340,7 +340,7 @@ int mthca_init_av_table(struct mthca_dev *dev) if (err) return err; - dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev, + dev->av_table.pool = dma_pool_create("mthca_av", &dev->pdev->dev, MTHCA_AV_SIZE, MTHCA_AV_SIZE, 0); if (!dev->av_table.pool) @@ -360,7 +360,7 @@ int mthca_init_av_table(struct mthca_dev *dev) return 0; out_free_pool: - pci_pool_destroy(dev->av_table.pool); + dma_pool_destroy(dev->av_table.pool); out_free_alloc: mthca_alloc_cleanup(&dev->av_table.alloc); @@ -374,6 +374,6 @@ void mthca_cleanup_av_table(struct mthca_dev *dev) if (dev->av_table.av_map) iounmap(dev->av_table.av_map); - pci_pool_destroy(dev->av_table.pool); + dma_pool_destroy(dev->av_table.pool); mthca_alloc_cleanup(&dev->av_table.alloc); } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index d0f062fc2a4b..a1900d2a371b 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -538,7 +538,7 @@ int mthca_cmd_init(struct mthca_dev *dev) return -ENOMEM; } - dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + dev->cmd.pool = dma_pool_create("mthca_cmd", &dev->pdev->dev, MTHCA_MAILBOX_SIZE, MTHCA_MAILBOX_SIZE, 0); if (!dev->cmd.pool) { @@ -551,7 +551,7 @@ int mthca_cmd_init(struct mthca_dev *dev) void mthca_cmd_cleanup(struct mthca_dev *dev) { - pci_pool_destroy(dev->cmd.pool); + dma_pool_destroy(dev->cmd.pool); iounmap(dev->hcr); if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS) iounmap(dev->cmd.dbell_map); @@ -621,7 +621,7 @@ struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + mailbox->buf = dma_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); @@ -635,7 +635,7 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) if (!mailbox) return; - pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ec7da9a474cd..5508afbf1c67 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -118,7 +118,7 @@ enum { }; struct mthca_cmd { - struct pci_pool *pool; + struct dma_pool *pool; struct mutex hcr_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -263,7 +263,7 @@ struct mthca_qp_table { }; struct mthca_av_table { - struct pci_pool *pool; + struct dma_pool *pool; int num_ddr_avs; u64 ddr_av_base; void __iomem *av_map; -- cgit v1.2.3 From e093111ddb6c786e32b882108c1c08ef83d781f4 Mon Sep 17 00:00:00 2001 From: "Amrani, Ram" Date: Tue, 27 Jun 2017 17:04:42 +0300 Subject: IB/core: Fix input len in multiple user verbs Most user verbs pass user data to the kernel with the inclusion of the ib_uverbs_cmd_hdr structure. This is problematic because the vendor has no ideas if the verb was called by a legacy verb or an extended verb. Also, the incosistency between the verbs is confusing. Fixes: 565197dd8fb1 ("IB/core: Extend ib_uverbs_create_cq") Signed-off-by: Ram Amrani Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 70 ++++++++++++++++------------ drivers/infiniband/hw/mlx5/cq.c | 6 +-- drivers/infiniband/hw/mlx5/main.c | 11 ++--- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- 4 files changed, 46 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index a21881e22bad..7ea5a3bb5a04 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -91,9 +91,10 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, goto err; } - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); if (ret) @@ -319,9 +320,10 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); uobj = uobj_alloc(uobj_get_type(pd), file->ucontext); if (IS_ERR(uobj)) @@ -488,9 +490,10 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); mutex_lock(&file->device->xrcd_tree_mutex); @@ -652,9 +655,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; @@ -746,7 +750,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long) cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; @@ -1086,7 +1091,8 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; @@ -1167,9 +1173,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext); if (!cq) @@ -1742,9 +1749,10 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd, out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); obj = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp), file->ucontext); @@ -2055,7 +2063,8 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, return -EOPNOTSUPP; INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, - in_len - sizeof(cmd.base), out_len); + in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr), + out_len); ret = modify_qp(file, &cmd, &udata); if (ret) @@ -2561,7 +2570,8 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), - in_len - sizeof(cmd), out_len - sizeof(resp)); + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); uobj = uobj_alloc(uobj_get_type(ah), file->ucontext); if (IS_ERR(uobj)) @@ -3625,10 +3635,10 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) @@ -3652,10 +3662,10 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - INIT_UDATA(&udata, buf + sizeof cmd, - (unsigned long) cmd.response + sizeof resp, - in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), - out_len - sizeof resp); + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), + out_len - sizeof(resp)); ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a384d72ea3cd..c155df465c44 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -751,10 +751,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, void *cqc; int err; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = udata->inlen < sizeof(ucmd) ? + (sizeof(ucmd) - sizeof(ucmd.reserved)) : sizeof(ucmd); if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cde7d1ce4a3c..e2fffdc43c86 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1273,7 +1273,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_bfreg_info *bfregi; int ver; int err; - size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); bool lib_uar_4k; @@ -1281,18 +1280,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!dev->ib_active) return ERR_PTR(-EAGAIN); - if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) - return ERR_PTR(-EINVAL); - - reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); - if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) ver = 0; - else if (reqlen >= min_req_v2) + else if (udata->inlen >= min_req_v2) ver = 2; else return ERR_PTR(-EINVAL); - err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); + err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index eae9bffd45d4..6fee7795d1c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -914,7 +914,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int err = 0; int write_mtt_size; - if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) { + if (udata->inlen < sizeof ucmd) { if (!to_mucontext(pd->uobject->context)->reg_mr_warned) { mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n", current->comm); -- cgit v1.2.3 From ecdb19f4b513033e6f2c4326cd5b81e04393e5e1 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Fri, 4 Aug 2017 13:52:13 -0700 Subject: IB/hfi1: Revert egress pkey check enforcement Current code has some serious flaws. Disarm the flag pending an appropriate patch. Fixes: 53526500f301 ("IB/hfi1: Permanently enable P_Key checking in HFI") Cc: stable@vger.kernel.org Reviewed-by: Mike Marciniszyn Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index be027c9c880e..da7cd5bd27ca 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -519,7 +519,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY; ppd->part_enforce |= HFI1_PART_ENFORCE_IN; - ppd->part_enforce |= HFI1_PART_ENFORCE_OUT; if (loopback) { hfi1_early_err(&pdev->dev, -- cgit v1.2.3 From 16570d3da0938e0c46c31e5f97c9c8452025d2e7 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Fri, 4 Aug 2017 13:52:20 -0700 Subject: IB/hfi1: Remove pmtu from the QP structure The pmtu field doens't have be stored in the QP structure as it can easily be calculated when needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 3 +-- include/rdma/rdmavt_qp.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 1878a97364aa..eb0c3d60c584 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1243,7 +1243,6 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MTU) { qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); - qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); qp->log_pmtu = ilog2(qp->pmtu); } @@ -1366,7 +1365,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->qp_state = qp->state; attr->cur_qp_state = attr->qp_state; - attr->path_mtu = qp->path_mtu; + attr->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); attr->path_mig_state = qp->s_mig_state; attr->qkey = qp->qkey; attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 07e2fffa6de6..8fbafb0ce674 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -277,7 +277,6 @@ struct rvt_qp { unsigned long timeout_jiffies; /* computed from timeout */ - enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; -- cgit v1.2.3 From 02a222c7f6c8d17b5fb6803ca453fcd9d5a3853d Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Fri, 4 Aug 2017 13:52:26 -0700 Subject: IB/hfi1: Remove lstate from hfi1_pportdata Do not track logical state separately from host_link_state. Deduce logical state from host_link_state when required. Transitions in set_link_state and goto_offline already make sure host_link_state reflects hardware's logical state properly. Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 52 +++++++++++++++++++-------------------- drivers/infiniband/hw/hfi1/chip.h | 3 +-- drivers/infiniband/hw/hfi1/hfi.h | 17 ------------- drivers/infiniband/hw/hfi1/mad.c | 2 +- 4 files changed, 28 insertions(+), 46 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index fdeec8abb6f1..2320b8fe9dbc 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1065,6 +1065,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, static int read_idle_sma(struct hfi1_devdata *dd, u64 *data); static int thermal_init(struct hfi1_devdata *dd); +static void update_statusp(struct hfi1_pportdata *ppd, u32 state); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, @@ -10261,8 +10262,10 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); - /* call again to adjust ppd->statusp, if needed */ - get_logical_state(ppd); + /* adjust ppd->statusp, if needed */ + update_statusp(ppd, IB_PORT_DOWN); + + dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n"); } /* @@ -10458,11 +10461,11 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd) } /* - * driver_logical_state - convert the driver's notion of a port's + * driver_lstate - convert the driver's notion of a port's * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1 * (converted to a u32) to indicate error. */ -u32 driver_logical_state(struct hfi1_pportdata *ppd) +u32 driver_lstate(struct hfi1_pportdata *ppd) { if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN)) return IB_PORT_DOWN; @@ -12651,20 +12654,8 @@ const char *opa_pstate_name(u32 pstate) return "unknown"; } -/* - * Read the hardware link state and set the driver's cached value of it. - * Return the (new) current value. - */ -u32 get_logical_state(struct hfi1_pportdata *ppd) +static void update_statusp(struct hfi1_pportdata *ppd, u32 state) { - u32 new_state; - - new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd)); - if (new_state != ppd->lstate) { - dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", - opa_lstate_name(new_state), new_state); - ppd->lstate = new_state; - } /* * Set port status flags in the page mapped into userspace * memory. Do it here to ensure a reliable state - this is @@ -12674,7 +12665,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd) * function. */ if (ppd->statusp) { - switch (ppd->lstate) { + switch (state) { case IB_PORT_DOWN: case IB_PORT_INIT: *ppd->statusp &= ~(HFI1_STATUS_IB_CONF | @@ -12688,10 +12679,9 @@ u32 get_logical_state(struct hfi1_pportdata *ppd) break; } } - return ppd->lstate; } -/** +/* * wait_logical_linkstate - wait for an IB link state change to occur * @ppd: port device * @state: the state to wait for @@ -12705,18 +12695,29 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs) { unsigned long timeout; + u32 new_state; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { - if (get_logical_state(ppd) == state) - return 0; - if (time_after(jiffies, timeout)) + new_state = chip_to_opa_lstate(ppd->dd, + read_logical_state(ppd->dd)); + if (new_state == state) break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for link state 0x%x\n", + state); + return -ETIMEDOUT; + } msleep(20); } - dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state); - return -ETIMEDOUT; + update_statusp(ppd, state); + dd_dev_info(ppd->dd, + "logical state changed to %s (0x%x)\n", + opa_lstate_name(state), + state); + return 0; } /* @@ -14855,7 +14856,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, * Set the initial values to reasonable default, will be set * for real when link is up. */ - ppd->lstate = IB_PORT_DOWN; ppd->overrun_threshold = 0x4; ppd->phy_error_threshold = 0xf; ppd->port_crc_mode_enabled = link_crc_mask; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 6a0c691f1385..f7083025fb04 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -748,12 +748,11 @@ int is_ax(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); -u32 get_logical_state(struct hfi1_pportdata *ppd); void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); u32 driver_physical_state(struct hfi1_pportdata *ppd); -u32 driver_logical_state(struct hfi1_pportdata *ppd); +u32 driver_lstate(struct hfi1_pportdata *ppd); int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok); int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fb5f8394fbed..e66e8f96ceab 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -591,8 +591,6 @@ struct hfi1_pportdata { struct mutex hls_lock; u32 host_link_state; - u32 lstate; /* logical link state */ - /* these are the "32 bit" regs */ u32 ibmtu; /* The MTU programmed for this unit */ @@ -1296,21 +1294,6 @@ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) int hfi1_reset_device(int); -/* return the driver's idea of the logical OPA port state */ -static inline u32 driver_lstate(struct hfi1_pportdata *ppd) -{ - /* - * The driver does some processing from the time the logical - * link state is at INIT to the time the SM can be notified - * as such. Return IB_PORT_DOWN until the software state - * is ready. - */ - if (ppd->lstate == IB_PORT_INIT && !(ppd->host_link_state & HLS_UP)) - return IB_PORT_DOWN; - else - return ppd->lstate; -} - /* return the driver's idea of the physical OPA port state */ static inline u32 driver_pstate(struct hfi1_pportdata *ppd) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 11be4d19e607..cd1f6f841f34 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1117,7 +1117,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, u32 logical_new, u32 physical_new) { u32 physical_old = driver_physical_state(ppd); - u32 logical_old = driver_logical_state(ppd); + u32 logical_old = driver_lstate(ppd); int ret, logical_allowed, physical_allowed; ret = logical_transition_allowed(logical_old, logical_new); -- cgit v1.2.3 From 64a296f579303322ebec9edae09cf87240b1ad78 Mon Sep 17 00:00:00 2001 From: Bartlomiej Dudek Date: Fri, 4 Aug 2017 13:52:32 -0700 Subject: IB/hfi1: Use host_link_state to read state when DC is shut down When DC is shut down (by e.g. disconnecting the cable), the driver should use host_link_state to get port's current physical state. This is due to the fact that physical state is read from DC's CSRs and when DC is shut down and state is changed, its registers are not impacted. Reviewed-by: Jakub Byczkowski Signed-off-by: Bartlomiej Dudek Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index e66e8f96ceab..728ed457e447 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1297,6 +1297,13 @@ int hfi1_reset_device(int); /* return the driver's idea of the physical OPA port state */ static inline u32 driver_pstate(struct hfi1_pportdata *ppd) { + /* + * When DC is shut down and state is changed, its CSRs are not + * impacted, therefore host_link_state should be used to get + * current physical state. + */ + if (ppd->dd->dc_shutdown) + return driver_physical_state(ppd); /* * The driver does some processing from the time the physical * link state is at LINKUP to the time the SM can be notified -- cgit v1.2.3 From f2a3bc00a03c2cc9caa40c8867de973fd4e48c6a Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 4 Aug 2017 13:52:38 -0700 Subject: IB/hfi1: Protect context array set/clear with spinlock The rcd array can be accessed from user context or during interrupts. Protecting this with a mutex isn't a good idea because the mutex should not be used from an IRQ. Protect the allocation and freeing of rcd array elements with a spinlock. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 10 +- drivers/infiniband/hw/hfi1/file_ops.c | 171 ++++++++++++---------------- drivers/infiniband/hw/hfi1/hfi.h | 8 +- drivers/infiniband/hw/hfi1/init.c | 199 +++++++++++++++++++++++---------- drivers/infiniband/hw/hfi1/vnic_main.c | 22 +--- 5 files changed, 229 insertions(+), 181 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2320b8fe9dbc..2cdf09dfcb55 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15054,10 +15054,16 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; - ret = hfi1_create_ctxts(dd); + ret = hfi1_create_kctxts(dd); if (ret) goto bail_cleanup; + /* + * Initialize aspm, to be done after gen3 transition and setting up + * contexts and before enabling interrupts + */ + aspm_init(dd); + dd->rcvhdrsize = DEFAULT_RCVHDRSIZE; /* * rcd[0] is guaranteed to be valid by this point. Also, all @@ -15076,7 +15082,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, goto bail_cleanup; } - /* use contexts created by hfi1_create_ctxts */ + /* use contexts created by hfi1_create_kctxts */ ret = set_up_interrupts(dd); if (ret) goto bail_cleanup; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index a0c13fa5babb..7361366d80e4 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -79,8 +79,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); static u64 kvirt_to_phys(void *addr); static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); -static int init_subctxts(struct hfi1_ctxtdata *uctxt, - const struct hfi1_user_info *uinfo); +static void init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo); static int init_user_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); @@ -758,7 +758,6 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) goto done; hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); - mutex_lock(&hfi1_mutex); flush_wc(); /* drain user sdma queue */ @@ -778,6 +777,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; + mutex_lock(&hfi1_mutex); __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); fdata->uctxt = NULL; hfi1_rcd_put(uctxt); /* fdata reference */ @@ -844,6 +844,38 @@ static u64 kvirt_to_phys(void *addr) return paddr; } +static int complete_subctxt(struct hfi1_filedata *fd) +{ + int ret; + + /* + * sub-context info can only be set up after the base context + * has been completed. + */ + ret = wait_event_interruptible( + fd->uctxt->wait, + !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags)); + + if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) + ret = -ENOMEM; + + /* The only thing a sub context needs is the user_xxx stuff */ + if (!ret) { + fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); + ret = init_user_ctxt(fd, fd->uctxt); + } + + if (ret) { + hfi1_rcd_put(fd->uctxt); + fd->uctxt = NULL; + mutex_lock(&hfi1_mutex); + __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); + mutex_unlock(&hfi1_mutex); + } + + return ret; +} + static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) { int ret; @@ -854,24 +886,25 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) if (swmajor != HFI1_USER_SWMAJOR) return -ENODEV; + if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) + return -EINVAL; + swminor = uinfo->userversion & 0xffff; + /* + * Acquire the mutex to protect against multiple creations of what + * could be a shared base context. + */ mutex_lock(&hfi1_mutex); /* - * Get a sub context if necessary. + * Get a sub context if available (fd->uctxt will be set). * ret < 0 error, 0 no context, 1 sub-context found */ - ret = 0; - if (uinfo->subctxt_cnt) { - ret = find_sub_ctxt(fd, uinfo); - if (ret > 0) - fd->rec_cpu_num = - hfi1_get_proc_affinity(fd->uctxt->numa_id); - } + ret = find_sub_ctxt(fd, uinfo); /* - * Allocate a base context if context sharing is not required or we - * couldn't find a sub context. + * Allocate a base context if context sharing is not required or a + * sub context wasn't found. */ if (!ret) ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); @@ -879,31 +912,10 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) mutex_unlock(&hfi1_mutex); /* Depending on the context type, do the appropriate init */ - if (ret > 0) { - /* - * sub-context info can only be set up after the base - * context has been completed. - */ - ret = wait_event_interruptible(fd->uctxt->wait, !test_bit( - HFI1_CTXT_BASE_UNINIT, - &fd->uctxt->event_flags)); - if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) - ret = -ENOMEM; - - /* The only thing a sub context needs is the user_xxx stuff */ - if (!ret) - ret = init_user_ctxt(fd, fd->uctxt); - - if (ret) - clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - - } else if (!ret) { + switch (ret) { + case 0: ret = setup_base_ctxt(fd, uctxt); if (uctxt->subctxt_cnt) { - /* If there is an error, set the failed bit. */ - if (ret) - set_bit(HFI1_CTXT_BASE_FAILED, - &uctxt->event_flags); /* * Base context is done, notify anybody using a * sub-context that is waiting for this completion @@ -911,14 +923,12 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); wake_up(&uctxt->wait); } - if (ret) - deallocate_ctxt(uctxt); - } - - /* If an error occurred, clear the reference */ - if (ret && fd->uctxt) { - hfi1_rcd_put(fd->uctxt); - fd->uctxt = NULL; + break; + case 1: + ret = complete_subctxt(fd); + break; + default: + break; } return ret; @@ -926,7 +936,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) /* * The hfi1_mutex must be held when this function is called. It is - * necessary to ensure serialized access to the bitmask in_use_ctxts. + * necessary to ensure serialized creation of shared contexts. */ static int find_sub_ctxt(struct hfi1_filedata *fd, const struct hfi1_user_info *uinfo) @@ -935,6 +945,9 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd = fd->dd; u16 subctxt; + if (!uinfo->subctxt_cnt) + return 0; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { struct hfi1_ctxtdata *uctxt = dd->rcd[i]; @@ -983,7 +996,6 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_ctxtdata **cd) { struct hfi1_ctxtdata *uctxt; - u16 ctxt; int ret, numa; if (dd->flags & HFI1_FROZEN) { @@ -997,22 +1009,9 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, return -EIO; } - /* - * This check is sort of redundant to the next EBUSY error. It would - * also indicate an inconsistancy in the driver if this value was - * zero, but there were still contexts available. - */ if (!dd->freectxts) return -EBUSY; - for (ctxt = dd->first_dyn_alloc_ctxt; - ctxt < dd->num_rcv_contexts; ctxt++) - if (!dd->rcd[ctxt]) - break; - - if (ctxt == dd->num_rcv_contexts) - return -EBUSY; - /* * If we don't have a NUMA node requested, preference is towards * device NUMA node. @@ -1022,11 +1021,10 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, numa = cpu_to_node(fd->rec_cpu_num); else numa = numa_node_id(); - uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa); - if (!uctxt) { - dd_dev_err(dd, - "Unable to allocate ctxtdata memory, failing open\n"); - return -ENOMEM; + ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt); + if (ret < 0) { + dd_dev_err(dd, "user ctxtdata allocation failed\n"); + return ret; } hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)", uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num, @@ -1035,8 +1033,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, /* * Allocate and enable a PIO send context. */ - uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, - uctxt->dd->node); + uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node); if (!uctxt->sc) { ret = -ENOMEM; goto ctxdata_free; @@ -1048,20 +1045,13 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, goto ctxdata_free; /* - * Setup sub context resources if the user-level has requested + * Setup sub context information if the user-level has requested * sub contexts. * This has to be done here so the rest of the sub-contexts find the - * proper master. + * proper base context. */ - if (uinfo->subctxt_cnt) { - ret = init_subctxts(uctxt, uinfo); - /* - * On error, we don't need to disable and de-allocate the - * send context because it will be done during file close - */ - if (ret) - goto ctxdata_free; - } + if (uinfo->subctxt_cnt) + init_subctxts(uctxt, uinfo); uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); @@ -1081,9 +1071,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, return 0; ctxdata_free: - *cd = NULL; - dd->rcd[ctxt] = NULL; - hfi1_rcd_put(uctxt); + hfi1_free_ctxt(dd, uctxt); return ret; } @@ -1093,28 +1081,17 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) hfi1_stats.sps_ctxts--; if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts) aspm_enable_all(uctxt->dd); - - /* _rcd_put() should be done after releasing mutex */ - uctxt->dd->rcd[uctxt->ctxt] = NULL; mutex_unlock(&hfi1_mutex); - hfi1_rcd_put(uctxt); /* dd reference */ + + hfi1_free_ctxt(uctxt->dd, uctxt); } -static int init_subctxts(struct hfi1_ctxtdata *uctxt, - const struct hfi1_user_info *uinfo) +static void init_subctxts(struct hfi1_ctxtdata *uctxt, + const struct hfi1_user_info *uinfo) { - u16 num_subctxts; - - num_subctxts = uinfo->subctxt_cnt; - if (num_subctxts > HFI1_MAX_SHARED_CTXTS) - return -EINVAL; - uctxt->subctxt_cnt = uinfo->subctxt_cnt; uctxt->subctxt_id = uinfo->subctxt_id; - uctxt->redirect_seq_cnt = 1; set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); - - return 0; } static int setup_subctxt(struct hfi1_ctxtdata *uctxt) @@ -1302,8 +1279,8 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, return 0; setup_failed: - /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */ - hfi1_free_ctxtdata(dd, uctxt); + set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); + deallocate_ctxt(uctxt); return ret; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 728ed457e447..bb003ff1df96 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -273,7 +273,6 @@ struct hfi1_ctxtdata { u16 poll_type; /* receive packet sequence counter */ u8 seq_cnt; - u8 redirect_seq_cnt; /* ctxt rcvhdrq head offset */ u32 head; /* QPs waiting for context processing */ @@ -1263,9 +1262,10 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd); int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); -int hfi1_create_ctxts(struct hfi1_devdata *dd); -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, - int numa); +int hfi1_create_kctxts(struct hfi1_devdata *dd); +int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, + struct hfi1_ctxtdata **rcd); +void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index da7cd5bd27ca..23f0bbc9c436 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -126,71 +126,67 @@ static struct idr hfi1_unit_table; u32 hfi1_cpulist_count; unsigned long *hfi1_cpulist; -/* - * Common code for creating the receive context array. - */ -int hfi1_create_ctxts(struct hfi1_devdata *dd) +static int hfi1_create_kctxt(struct hfi1_devdata *dd, + struct hfi1_pportdata *ppd) { - u16 i; + struct hfi1_ctxtdata *rcd; int ret; /* Control context has to be always 0 */ BUILD_BUG_ON(HFI1_CTRL_CTXT != 0); + ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd); + if (ret < 0) { + dd_dev_err(dd, "Kernel receive context allocation failed\n"); + return ret; + } + + /* + * Set up the kernel context flags here and now because they use + * default values for all receive side memories. User contexts will + * be handled as they are created. + */ + rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | + HFI1_CAP_KGET(NODROP_RHQ_FULL) | + HFI1_CAP_KGET(NODROP_EGR_FULL) | + HFI1_CAP_KGET(DMA_RTAIL); + + /* Control context must use DMA_RTAIL */ + if (rcd->ctxt == HFI1_CTRL_CTXT) + rcd->flags |= HFI1_CAP_DMA_RTAIL; + rcd->seq_cnt = 1; + + rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); + if (!rcd->sc) { + dd_dev_err(dd, "Kernel send context allocation failed\n"); + return -ENOMEM; + } + hfi1_init_ctxt(rcd->sc); + + return 0; +} + +/* + * Create the receive context array and one or more kernel contexts + */ +int hfi1_create_kctxts(struct hfi1_devdata *dd) +{ + u16 i; + int ret; + dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd), GFP_KERNEL, dd->node); if (!dd->rcd) - goto nomem; + return -ENOMEM; - /* create one or more kernel contexts */ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { - struct hfi1_pportdata *ppd; - struct hfi1_ctxtdata *rcd; - - ppd = dd->pport + (i % dd->num_pports); - - /* dd->rcd[i] gets assigned inside the callee */ - rcd = hfi1_create_ctxtdata(ppd, i, dd->node); - if (!rcd) { - dd_dev_err(dd, - "Unable to allocate kernel receive context, failing\n"); - goto nomem; - } - /* - * Set up the kernel context flags here and now because they - * use default values for all receive side memories. User - * contexts will be handled as they are created. - */ - rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | - HFI1_CAP_KGET(NODROP_RHQ_FULL) | - HFI1_CAP_KGET(NODROP_EGR_FULL) | - HFI1_CAP_KGET(DMA_RTAIL); - - /* Control context must use DMA_RTAIL */ - if (rcd->ctxt == HFI1_CTRL_CTXT) - rcd->flags |= HFI1_CAP_DMA_RTAIL; - rcd->seq_cnt = 1; - - rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node); - if (!rcd->sc) { - dd_dev_err(dd, - "Unable to allocate kernel send context, failing\n"); - goto nomem; - } - - hfi1_init_ctxt(rcd->sc); + ret = hfi1_create_kctxt(dd, dd->pport); + if (ret) + goto bail; } - /* - * Initialize aspm, to be done after gen3 transition and setting up - * contexts and before enabling interrupts - */ - aspm_init(dd); - return 0; -nomem: - ret = -ENOMEM; - +bail: for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) hfi1_rcd_put(dd->rcd[i]); @@ -208,6 +204,11 @@ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) kref_init(&rcd->kref); } +/** + * hfi1_rcd_free - When reference is zero clean up. + * @kref: pointer to an initialized rcd data structure + * + */ static void hfi1_rcd_free(struct kref *kref) { struct hfi1_ctxtdata *rcd = @@ -217,6 +218,12 @@ static void hfi1_rcd_free(struct kref *kref) kfree(rcd); } +/** + * hfi1_rcd_put - decrement reference for rcd + * @rcd: pointer to an initialized rcd data structure + * + * Use this to put a reference after the init. + */ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) { if (rcd) @@ -225,16 +232,58 @@ int hfi1_rcd_put(struct hfi1_ctxtdata *rcd) return 0; } +/** + * hfi1_rcd_get - increment reference for rcd + * @rcd: pointer to an initialized rcd data structure + * + * Use this to get a reference after the init. + */ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) { kref_get(&rcd->kref); } +/** + * allocate_rcd_index - allocate an rcd index from the rcd array + * @dd: pointer to a valid devdata structure + * @rcd: rcd data structure to assign + * @index: pointer to index that is allocated + * + * Find an empty index in the rcd array, and assign the given rcd to it. + * If the array is full, we are EBUSY. + * + */ +static u16 allocate_rcd_index(struct hfi1_devdata *dd, + struct hfi1_ctxtdata *rcd, u16 *index) +{ + unsigned long flags; + u16 ctxt; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++) + if (!dd->rcd[ctxt]) + break; + + if (ctxt < dd->num_rcv_contexts) { + rcd->ctxt = ctxt; + dd->rcd[ctxt] = rcd; + hfi1_rcd_init(rcd); + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + if (ctxt >= dd->num_rcv_contexts) + return -EBUSY; + + *index = ctxt; + + return 0; +} + /* * Common code for user and kernel context setup. */ -struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, - int numa) +int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, + struct hfi1_ctxtdata **context) { struct hfi1_devdata *dd = ppd->dd; struct hfi1_ctxtdata *rcd; @@ -248,9 +297,18 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; + u16 ctxt; + int ret; hfi1_cdbg(PROC, "setting up context %u\n", ctxt); + ret = allocate_rcd_index(dd, rcd, &ctxt); + if (ret) { + *context = NULL; + kfree(rcd); + return ret; + } + INIT_LIST_HEAD(&rcd->qp_wait_list); hfi1_exp_tid_group_init(&rcd->tid_group_list); hfi1_exp_tid_group_init(&rcd->tid_used_list); @@ -258,8 +316,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); - rcd->ctxt = ctxt; - dd->rcd[ctxt] = rcd; rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; @@ -363,15 +419,34 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u16 ctxt, goto bail; } - hfi1_rcd_init(rcd); + *context = rcd; + return 0; } - return rcd; + bail: - dd->rcd[ctxt] = NULL; - kfree(rcd->egrbufs.rcvtids); - kfree(rcd->egrbufs.buffers); - kfree(rcd); - return NULL; + *context = NULL; + hfi1_free_ctxt(dd, rcd); + return -ENOMEM; +} + +/** + * hfi1_free_ctxt + * @dd: Pointer to a valid device + * @rcd: pointer to an initialized rcd data structure + * + * This is the "free" to match the _create_ctxtdata (alloc) function. + * This is the final "put" for the kref. + */ +void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) +{ + unsigned long flags; + + if (rcd) { + spin_lock_irqsave(&dd->uctxt_lock, flags); + dd->rcd[rcd->ctxt] = NULL; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + hfi1_rcd_put(rcd); + } } /* diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 89b056489769..c91456c16cdb 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -106,22 +106,13 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata **vnic_ctxt) { struct hfi1_ctxtdata *uctxt; - u16 ctxt; int ret; if (dd->flags & HFI1_FROZEN) return -EIO; - for (ctxt = dd->first_dyn_alloc_ctxt; - ctxt < dd->num_rcv_contexts; ctxt++) - if (!dd->rcd[ctxt]) - break; - - if (ctxt == dd->num_rcv_contexts) - return -EBUSY; - - uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); - if (!uctxt) { + ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); + if (ret < 0) { dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); return -ENOMEM; } @@ -156,11 +147,10 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: /* - * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will + * hfi1_free_ctxt() will call hfi1_free_ctxtdata(), which will * release send_context structure if uctxt->sc is not null */ - dd->rcd[uctxt->ctxt] = NULL; - hfi1_rcd_put(uctxt); + hfi1_free_ctxt(dd, uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -201,14 +191,14 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; spin_unlock_irqrestore(&dd->uctxt_lock, flags); - dd->rcd[uctxt->ctxt] = NULL; uctxt->event_flags = 0; hfi1_clear_tids(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); hfi1_stats.sps_ctxts--; - hfi1_rcd_put(uctxt); + + hfi1_free_ctxt(dd, uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) -- cgit v1.2.3 From d295dbeb2a0c93364444e76b3bb30f587a823e0e Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Fri, 4 Aug 2017 13:52:44 -0700 Subject: IB/hf1: User context locking is inconsistent There is a mixture of mutex and spinlocks to protect receive context (rcd/uctxt) information. This is not used consistently. Use the mutex to protect device receive context information only. Use the spinlock to protect sub context information only. Protect access to items in the rcd array with a spinlock and reference count. Remove spinlock around dd->rcd array cleanup. Since interrupts are disabled and cleaned up before this point, this lock is not useful. Reviewed-by: Mike Marciniszyn Reviewed-by: Sebastian Sanchez Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/aspm.h | 35 ++++--- drivers/infiniband/hw/hfi1/chip.c | 30 ++++-- drivers/infiniband/hw/hfi1/debugfs.c | 32 ++++-- drivers/infiniband/hw/hfi1/driver.c | 71 ++++++++----- drivers/infiniband/hw/hfi1/file_ops.c | 185 +++++++++++++++++++++------------ drivers/infiniband/hw/hfi1/hfi.h | 6 +- drivers/infiniband/hw/hfi1/init.c | 134 +++++++++++++++--------- drivers/infiniband/hw/hfi1/trace_rx.h | 12 +-- drivers/infiniband/hw/hfi1/vnic_main.c | 12 +-- 9 files changed, 326 insertions(+), 191 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 3f9a071ccac3..522b40ed9937 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -240,11 +240,14 @@ static inline void aspm_disable_all(struct hfi1_devdata *dd) u16 i; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = dd->rcd[i]; - del_timer_sync(&rcd->aspm_timer); - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = false; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + del_timer_sync(&rcd->aspm_timer); + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = false; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } } aspm_disable(dd); @@ -264,11 +267,14 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) return; for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { - rcd = dd->rcd[i]; - spin_lock_irqsave(&rcd->aspm_lock, flags); - rcd->aspm_intr_enable = true; - rcd->aspm_enabled = true; - spin_unlock_irqrestore(&rcd->aspm_lock, flags); + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) { + spin_lock_irqsave(&rcd->aspm_lock, flags); + rcd->aspm_intr_enable = true; + rcd->aspm_enabled = true; + spin_unlock_irqrestore(&rcd->aspm_lock, flags); + hfi1_rcd_put(rcd); + } } } @@ -284,13 +290,18 @@ static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) static inline void aspm_init(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u16 i; spin_lock_init(&dd->aspm_lock); dd->aspm_supported = aspm_hw_l1_supported(dd); - for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) - aspm_ctx_init(dd->rcd[i]); + for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + aspm_ctx_init(rcd); + hfi1_rcd_put(rcd); + } /* Start with ASPM disabled */ aspm_hw_set_l1_ent_latency(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 2cdf09dfcb55..1446c16bc8a8 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6785,13 +6785,17 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze) static void rxe_freeze(struct hfi1_devdata *dd) { int i; + struct hfi1_ctxtdata *rcd; /* disable port */ clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); /* disable all receive contexts */ - for (i = 0; i < dd->num_rcv_contexts; i++) - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, dd->rcd[i]); + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, rcd); + hfi1_rcd_put(rcd); + } } /* @@ -6804,20 +6808,23 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) { u32 rcvmask; u16 i; + struct hfi1_ctxtdata *rcd; /* enable all kernel contexts */ for (i = 0; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *rcd = dd->rcd[i]; + rcd = hfi1_rcd_get_by_index(dd, i); /* Ensure all non-user contexts(including vnic) are enabled */ - if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) + if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) { + hfi1_rcd_put(rcd); continue; - + } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); + hfi1_rcd_put(rcd); } /* enable port */ @@ -8104,7 +8111,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) char *err_detail; if (likely(source < dd->num_rcv_contexts)) { - rcd = dd->rcd[source]; + rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* Check for non-user contexts, including vnic */ if ((source < dd->first_dyn_alloc_ctxt) || @@ -8112,6 +8119,8 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); + + hfi1_rcd_put(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ @@ -8133,12 +8142,14 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) char *err_detail; if (likely(source < dd->num_rcv_contexts)) { - rcd = dd->rcd[source]; + rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* only pay attention to user urgent interrupts */ if ((source >= dd->first_dyn_alloc_ctxt) && (!rcd->sc || (rcd->sc->type == SC_USER))) handle_user_interrupt(rcd); + + hfi1_rcd_put(rcd); return; /* OK */ } /* received an interrupt, but no rcd */ @@ -8343,7 +8354,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data) int disposition; int present; - trace_hfi1_receive_interrupt(dd, rcd->ctxt); + trace_hfi1_receive_interrupt(dd, rcd); this_cpu_inc(*dd->int_counter); aspm_ctx_disable(rcd); @@ -13030,7 +13041,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) me->type = IRQ_SDMA; } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; - rcd = dd->rcd[idx]; + rcd = hfi1_rcd_get_by_index(dd, idx); if (rcd) { /* * Set the interrupt register and mask for this @@ -13049,6 +13060,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) remap_intr(dd, IS_RCVAVAIL_START + idx, i); me->type = IRQ_RCVCTXT; rcd->msix_intr = i; + hfi1_rcd_put(rcd); } } else { /* not in our expected range - complain, then diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index e9fa3c293e42..550119c6f1ee 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -173,12 +173,15 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0, n_bytes = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - if (!dd->rcd[j]) - continue; - n_packets += dd->rcd[j]->opstats->stats[i].n_packets; - n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); } if (!n_packets && !n_bytes) return SEQ_SKIP; @@ -231,6 +234,7 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; if (v == SEQ_START_TOKEN) { seq_puts(s, "Ctx:npkts\n"); @@ -240,11 +244,14 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) spos = v; i = *spos; - if (!dd->rcd[i]) + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) return SEQ_SKIP; - for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) - n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + for (j = 0; j < ARRAY_SIZE(rcd->opstats->stats); j++) + n_packets += rcd->opstats->stats[j].n_packets; + + hfi1_rcd_put(rcd); if (!n_packets) return SEQ_SKIP; @@ -1098,12 +1105,15 @@ static int _fault_stats_seq_show(struct seq_file *s, void *v) u64 n_packets = 0, n_bytes = 0; struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - if (!dd->rcd[j]) - continue; - n_packets += dd->rcd[j]->opstats->stats[i].n_packets; - n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); } if (!n_packets && !n_bytes) return SEQ_SKIP; diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 0b7ca0e05320..14f2a00c13c2 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -839,6 +839,7 @@ bail: static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) { + struct hfi1_ctxtdata *rcd; u16 i; /* @@ -847,18 +848,27 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - dd->rcd[ctxt]->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + rcd = hfi1_rcd_get_by_index(dd, ctxt); + if (rcd) { + rcd->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + hfi1_rcd_put(rcd); + } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) - dd->rcd[i]->do_interrupt = - &handle_receive_interrupt_nodma_rtail; + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + rcd->do_interrupt = + &handle_receive_interrupt_nodma_rtail; + hfi1_rcd_put(rcd); + } } static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) { + struct hfi1_ctxtdata *rcd; u16 i; /* @@ -867,27 +877,39 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - dd->rcd[ctxt]->do_interrupt = - &handle_receive_interrupt_dma_rtail; + rcd = hfi1_rcd_get_by_index(dd, ctxt); + if (rcd) { + rcd->do_interrupt = + &handle_receive_interrupt_dma_rtail; + hfi1_rcd_put(rcd); + } return; } - for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) - dd->rcd[i]->do_interrupt = - &handle_receive_interrupt_dma_rtail; + for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + rcd->do_interrupt = + &handle_receive_interrupt_dma_rtail; + hfi1_rcd_put(rcd); + } } void set_all_slowpath(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u16 i; /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *rcd = dd->rcd[i]; - + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) + continue; if ((i < dd->first_dyn_alloc_ctxt) || - (rcd && rcd->sc && (rcd->sc->type == SC_KERNEL))) + (rcd->sc && (rcd->sc->type == SC_KERNEL))) { rcd->do_interrupt = &handle_receive_interrupt; + } + hfi1_rcd_put(rcd); } } @@ -1068,6 +1090,7 @@ void receive_interrupt_work(struct work_struct *work) struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, linkstate_active_work); struct hfi1_devdata *dd = ppd->dd; + struct hfi1_ctxtdata *rcd; u16 i; /* Received non-SC15 packet implies neighbor_normal */ @@ -1078,8 +1101,12 @@ void receive_interrupt_work(struct work_struct *work) * Interrupt all statically allocated kernel contexts that could * have had an interrupt during auto activation. */ - for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) - force_recv_intr(dd->rcd[i]); + for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + force_recv_intr(rcd); + hfi1_rcd_put(rcd); + } } /* @@ -1270,10 +1297,8 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, int hfi1_reset_device(int unit) { int ret; - u16 i; struct hfi1_devdata *dd = hfi1_lookup(unit); struct hfi1_pportdata *ppd; - unsigned long flags; int pidx; if (!dd) { @@ -1291,17 +1316,15 @@ int hfi1_reset_device(int unit) goto bail; } - spin_lock_irqsave(&dd->uctxt_lock, flags); + /* If there are any user/vnic contexts, we cannot reset */ + mutex_lock(&hfi1_mutex); if (dd->rcd) - for (i = dd->first_dyn_alloc_ctxt; - i < dd->num_rcv_contexts; i++) { - if (!dd->rcd[i]) - continue; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (hfi1_stats.sps_ctxts) { + mutex_unlock(&hfi1_mutex); ret = -EBUSY; goto bail; } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); + mutex_unlock(&hfi1_mutex); for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7361366d80e4..ab8eb2bf48d8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -757,7 +757,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (!uctxt) goto done; - hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); + hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt); flush_wc(); /* drain user sdma queue */ @@ -769,6 +769,13 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) /* clean up rcv side */ hfi1_user_exp_rcv_free(fdata); + /* + * fdata->uctxt is used in the above cleanup. It is not ready to be + * removed until here. + */ + fdata->uctxt = NULL; + hfi1_rcd_put(uctxt); + /* * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. @@ -777,16 +784,14 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; *ev = 0; - mutex_lock(&hfi1_mutex); + spin_lock_irqsave(&dd->uctxt_lock, flags); __clear_bit(fdata->subctxt, uctxt->in_use_ctxts); - fdata->uctxt = NULL; - hfi1_rcd_put(uctxt); /* fdata reference */ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { - mutex_unlock(&hfi1_mutex); + spin_unlock_irqrestore(&dd->uctxt_lock, flags); goto done; } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); - spin_lock_irqsave(&dd->uctxt_lock, flags); /* * Disable receive context and interrupt available, reset all * RcvCtxtCtrl bits to default values. @@ -808,13 +813,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) set_pio_integrity(uctxt->sc); sc_disable(uctxt->sc); } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); hfi1_free_ctxt_rcv_groups(uctxt); hfi1_clear_ctxt_pkey(dd, uctxt); uctxt->event_flags = 0; - mutex_unlock(&hfi1_mutex); deallocate_ctxt(uctxt); done: @@ -844,9 +847,22 @@ static u64 kvirt_to_phys(void *addr) return paddr; } +/** + * complete_subctxt + * @fd: valid filedata pointer + * + * Sub-context info can only be set up after the base context + * has been completed. This is indicated by the clearing of the + * HFI1_CTXT_BASE_UINIT bit. + * + * Wait for the bit to be cleared, and then complete the subcontext + * initialization. + * + */ static int complete_subctxt(struct hfi1_filedata *fd) { int ret; + unsigned long flags; /* * sub-context info can only be set up after the base context @@ -859,7 +875,7 @@ static int complete_subctxt(struct hfi1_filedata *fd) if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) ret = -ENOMEM; - /* The only thing a sub context needs is the user_xxx stuff */ + /* Finish the sub-context init */ if (!ret) { fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id); ret = init_user_ctxt(fd, fd->uctxt); @@ -868,9 +884,9 @@ static int complete_subctxt(struct hfi1_filedata *fd) if (ret) { hfi1_rcd_put(fd->uctxt); fd->uctxt = NULL; - mutex_lock(&hfi1_mutex); + spin_lock_irqsave(&fd->dd->uctxt_lock, flags); __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts); - mutex_unlock(&hfi1_mutex); + spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags); } return ret; @@ -911,14 +927,15 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) mutex_unlock(&hfi1_mutex); - /* Depending on the context type, do the appropriate init */ + /* Depending on the context type, finish the appropriate init */ switch (ret) { case 0: ret = setup_base_ctxt(fd, uctxt); if (uctxt->subctxt_cnt) { /* - * Base context is done, notify anybody using a - * sub-context that is waiting for this completion + * Base context is done (successfully or not), notify + * anybody using a sub-context that is waiting for + * this completion. */ clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); wake_up(&uctxt->wait); @@ -934,58 +951,97 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) return ret; } -/* - * The hfi1_mutex must be held when this function is called. It is - * necessary to ensure serialized creation of shared contexts. +/** + * match_ctxt + * @fd: valid filedata pointer + * @uinfo: user info to compare base context with + * @uctxt: context to compare uinfo to. + * + * Compare the given context with the given information to see if it + * can be used for a sub context. */ -static int find_sub_ctxt(struct hfi1_filedata *fd, - const struct hfi1_user_info *uinfo) +static int match_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo, + struct hfi1_ctxtdata *uctxt) { - u16 i; struct hfi1_devdata *dd = fd->dd; + unsigned long flags; u16 subctxt; - if (!uinfo->subctxt_cnt) + /* Skip dynamically allocated kernel contexts */ + if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) return 0; - for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { - struct hfi1_ctxtdata *uctxt = dd->rcd[i]; + /* Skip ctxt if it doesn't match the requested one */ + if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) || + uctxt->jkey != generate_jkey(current_uid()) || + uctxt->subctxt_id != uinfo->subctxt_id || + uctxt->subctxt_cnt != uinfo->subctxt_cnt) + return 0; - /* Skip ctxts which are not yet open */ - if (!uctxt || - bitmap_empty(uctxt->in_use_ctxts, - HFI1_MAX_SHARED_CTXTS)) - continue; + /* Verify the sharing process matches the base */ + if (uctxt->userversion != uinfo->userversion) + return -EINVAL; - /* Skip dynamically allocted kernel contexts */ - if (uctxt->sc && (uctxt->sc->type == SC_KERNEL)) - continue; + /* Find an unused sub context */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) { + /* context is being closed, do not use */ + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + return 0; + } - /* Skip ctxt if it doesn't match the requested one */ - if (memcmp(uctxt->uuid, uinfo->uuid, - sizeof(uctxt->uuid)) || - uctxt->jkey != generate_jkey(current_uid()) || - uctxt->subctxt_id != uinfo->subctxt_id || - uctxt->subctxt_cnt != uinfo->subctxt_cnt) - continue; + subctxt = find_first_zero_bit(uctxt->in_use_ctxts, + HFI1_MAX_SHARED_CTXTS); + if (subctxt >= uctxt->subctxt_cnt) { + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + return -EBUSY; + } - /* Verify the sharing process matches the master */ - if (uctxt->userversion != uinfo->userversion) - return -EINVAL; + fd->subctxt = subctxt; + __set_bit(fd->subctxt, uctxt->in_use_ctxts); + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + fd->uctxt = uctxt; + hfi1_rcd_get(uctxt); - /* Find an unused context */ - subctxt = find_first_zero_bit(uctxt->in_use_ctxts, - HFI1_MAX_SHARED_CTXTS); - if (subctxt >= uctxt->subctxt_cnt) - return -EBUSY; + return 1; +} - fd->uctxt = uctxt; - fd->subctxt = subctxt; +/** + * find_sub_ctxt + * @fd: valid filedata pointer + * @uinfo: matching info to use to find a possible context to share. + * + * The hfi1_mutex must be held when this function is called. It is + * necessary to ensure serialized creation of shared contexts. + * + * Return: + * 0 No sub-context found + * 1 Subcontext found and allocated + * errno EINVAL (incorrect parameters) + * EBUSY (all sub contexts in use) + */ +static int find_sub_ctxt(struct hfi1_filedata *fd, + const struct hfi1_user_info *uinfo) +{ + struct hfi1_ctxtdata *uctxt; + struct hfi1_devdata *dd = fd->dd; + u16 i; + int ret; - hfi1_rcd_get(uctxt); - __set_bit(fd->subctxt, uctxt->in_use_ctxts); + if (!uinfo->subctxt_cnt) + return 0; - return 1; + for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) { + uctxt = hfi1_rcd_get_by_index(dd, i); + if (uctxt) { + ret = match_ctxt(fd, uinfo, uctxt); + hfi1_rcd_put(uctxt); + /* value of != 0 will return */ + if (ret) + return ret; + } } return 0; @@ -993,7 +1049,7 @@ static int find_sub_ctxt(struct hfi1_filedata *fd, static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, struct hfi1_user_info *uinfo, - struct hfi1_ctxtdata **cd) + struct hfi1_ctxtdata **rcd) { struct hfi1_ctxtdata *uctxt; int ret, numa; @@ -1066,12 +1122,12 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, if (dd->freectxts-- == dd->num_user_contexts) aspm_disable_all(dd); - *cd = uctxt; + *rcd = uctxt; return 0; ctxdata_free: - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); return ret; } @@ -1083,7 +1139,7 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt) aspm_enable_all(uctxt->dd); mutex_unlock(&hfi1_mutex); - hfi1_free_ctxt(uctxt->dd, uctxt); + hfi1_free_ctxt(uctxt); } static void init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1279,8 +1335,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, return 0; setup_failed: + /* Set the failed bit so sub-context init can do the right thing */ set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); deallocate_ctxt(uctxt); + return ret; } @@ -1417,18 +1475,13 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) struct hfi1_ctxtdata *uctxt; struct hfi1_devdata *dd = ppd->dd; u16 ctxt; - int ret = 0; - unsigned long flags; - if (!dd->events) { - ret = -EINVAL; - goto done; - } + if (!dd->events) + return -EINVAL; - spin_lock_irqsave(&dd->uctxt_lock, flags); for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts; ctxt++) { - uctxt = dd->rcd[ctxt]; + uctxt = hfi1_rcd_get_by_index(dd, ctxt); if (uctxt) { unsigned long *evs = dd->events + (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * @@ -1441,11 +1494,11 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) set_bit(evtbit, evs); for (i = 1; i < uctxt->subctxt_cnt; i++) set_bit(evtbit, evs + i); + hfi1_rcd_put(uctxt); } } - spin_unlock_irqrestore(&dd->uctxt_lock, flags); -done: - return ret; + + return 0; } /** diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index bb003ff1df96..fa9160f68bb7 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -938,8 +938,7 @@ struct hfi1_devdata { u64 __iomem *egrtidbase; spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ - /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ - spinlock_t uctxt_lock; /* rcd and user context changes */ + spinlock_t uctxt_lock; /* protect rcd changes */ struct mutex dc8051_lock; /* exclusive access to 8051 */ struct workqueue_struct *update_cntr_wq; struct work_struct update_cntr_work; @@ -1265,12 +1264,13 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd); int hfi1_create_kctxts(struct hfi1_devdata *dd); int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, struct hfi1_ctxtdata **rcd); -void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); +void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd); void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, struct hfi1_devdata *dd, u8 hw_pidx, u8 port); void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); +struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 23f0bbc9c436..fba77001c3a7 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -188,7 +188,7 @@ int hfi1_create_kctxts(struct hfi1_devdata *dd) return 0; bail: for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) - hfi1_rcd_put(dd->rcd[i]); + hfi1_free_ctxt(dd->rcd[i]); /* All the contexts should be freed, free the array */ kfree(dd->rcd); @@ -197,7 +197,7 @@ bail: } /* - * Helper routines for the receive context reference count (rcd and uctxt) + * Helper routines for the receive context reference count (rcd and uctxt). */ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) { @@ -211,10 +211,16 @@ static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd) */ static void hfi1_rcd_free(struct kref *kref) { + unsigned long flags; struct hfi1_ctxtdata *rcd = container_of(kref, struct hfi1_ctxtdata, kref); hfi1_free_ctxtdata(rcd->dd, rcd); + + spin_lock_irqsave(&rcd->dd->uctxt_lock, flags); + rcd->dd->rcd[rcd->ctxt] = NULL; + spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags); + kfree(rcd); } @@ -253,7 +259,7 @@ void hfi1_rcd_get(struct hfi1_ctxtdata *rcd) * If the array is full, we are EBUSY. * */ -static u16 allocate_rcd_index(struct hfi1_devdata *dd, +static int allocate_rcd_index(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd, u16 *index) { unsigned long flags; @@ -279,8 +285,36 @@ static u16 allocate_rcd_index(struct hfi1_devdata *dd, return 0; } +/** + * hfi1_rcd_get_by_index + * @dd: pointer to a valid devdata structure + * @ctxt: the index of an possilbe rcd + * + * We need to protect access to the rcd array. If access is needed to + * one or more index, get the protecting spinlock and then increment the + * kref. + * + * The caller is responsible for making the _put(). + * + */ +struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt) +{ + unsigned long flags; + struct hfi1_ctxtdata *rcd = NULL; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (dd->rcd[ctxt]) { + rcd = dd->rcd[ctxt]; + hfi1_rcd_get(rcd); + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + return rcd; +} + /* - * Common code for user and kernel context setup. + * Common code for user and kernel context create and setup. + * NOTE: the initial kref is done here (hf1_rcd_init()). */ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, struct hfi1_ctxtdata **context) @@ -300,8 +334,6 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, u16 ctxt; int ret; - hfi1_cdbg(PROC, "setting up context %u\n", ctxt); - ret = allocate_rcd_index(dd, rcd, &ctxt); if (ret) { *context = NULL; @@ -321,6 +353,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, mutex_init(&rcd->exp_lock); + hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); + /* * Calculate the context's RcvArray entry starting point. * We do this here because we have to take into account all @@ -425,28 +459,23 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, bail: *context = NULL; - hfi1_free_ctxt(dd, rcd); + hfi1_free_ctxt(rcd); return -ENOMEM; } /** * hfi1_free_ctxt - * @dd: Pointer to a valid device * @rcd: pointer to an initialized rcd data structure * - * This is the "free" to match the _create_ctxtdata (alloc) function. - * This is the final "put" for the kref. + * This wrapper is the free function that matches hfi1_create_ctxtdata(). + * When a context is done being used (kernel or user), this function is called + * for the "final" put to match the kref init from hf1i_create_ctxtdata(). + * Other users of the context do a get/put sequence to make sure that the + * structure isn't removed while in use. */ -void hfi1_free_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) +void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd) { - unsigned long flags; - - if (rcd) { - spin_lock_irqsave(&dd->uctxt_lock, flags); - dd->rcd[rcd->ctxt] = NULL; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); - hfi1_rcd_put(rcd); - } + hfi1_rcd_put(rcd); } /* @@ -669,16 +698,19 @@ static int loadtime_init(struct hfi1_devdata *dd) static int init_after_reset(struct hfi1_devdata *dd) { int i; - + struct hfi1_ctxtdata *rcd; /* * Ensure chip does no sends or receives, tail updates, or * pioavail updates while we re-initialize. This is mostly * for the driver data structures, not chip registers. */ - for (i = 0; i < dd->num_rcv_contexts; i++) + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS | - HFI1_RCVCTRL_TAILUPD_DIS, dd->rcd[i]); + HFI1_RCVCTRL_TAILUPD_DIS, rcd); + hfi1_rcd_put(rcd); + } pio_send_control(dd, PSC_GLOBAL_DISABLE); for (i = 0; i < dd->num_send_contexts; i++) sc_disable(dd->send_contexts[i].sc); @@ -688,6 +720,7 @@ static int init_after_reset(struct hfi1_devdata *dd) static void enable_chip(struct hfi1_devdata *dd) { + struct hfi1_ctxtdata *rcd; u32 rcvmask; u16 i; @@ -699,17 +732,21 @@ static void enable_chip(struct hfi1_devdata *dd) * Other ctxts done as user opens and initializes them. */ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) { + rcd = hfi1_rcd_get_by_index(dd, i); + if (!rcd) + continue; rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB; - rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ? + rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; - if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR)) + if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; - if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_RHQ_FULL)) + if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL)) rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; - if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL)) + if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL)) rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; - hfi1_rcvctrl(dd, rcvmask, dd->rcd[i]); - sc_enable(dd->rcd[i]->sc); + hfi1_rcvctrl(dd, rcvmask, rcd); + sc_enable(rcd->sc); + hfi1_rcd_put(rcd); } } @@ -854,7 +891,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) * existing, and re-allocate. * Need to re-create rest of ctxt 0 ctxtdata as well. */ - rcd = dd->rcd[i]; + rcd = hfi1_rcd_get_by_index(dd, i); if (!rcd) continue; @@ -868,6 +905,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); ret = lastfail; } + hfi1_rcd_put(rcd); } /* Allocate enough memory for user event notification. */ @@ -987,6 +1025,7 @@ static void stop_timers(struct hfi1_devdata *dd) static void shutdown_device(struct hfi1_devdata *dd) { struct hfi1_pportdata *ppd; + struct hfi1_ctxtdata *rcd; unsigned pidx; int i; @@ -1005,12 +1044,15 @@ static void shutdown_device(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - for (i = 0; i < dd->num_rcv_contexts; i++) + for (i = 0; i < dd->num_rcv_contexts; i++) { + rcd = hfi1_rcd_get_by_index(dd, i); hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS | HFI1_RCVCTRL_PKEY_DIS | - HFI1_RCVCTRL_ONE_PKT_EGR_DIS, dd->rcd[i]); + HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd); + hfi1_rcd_put(rcd); + } /* * Gracefully stop all sends allowing any in progress to * trickle out first. @@ -1450,8 +1492,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) { int ctxt; int pidx; - struct hfi1_ctxtdata **tmp; - unsigned long flags; /* users can't do anything more with chip */ for (pidx = 0; pidx < dd->num_pports; ++pidx) { @@ -1476,18 +1516,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) free_credit_return(dd); - /* - * Free any resources still in use (usually just kernel contexts) - * at unload; we do for ctxtcnt, because that's what we allocate. - * We acquire lock to be really paranoid that rcd isn't being - * accessed from some interrupt-related code (that should not happen, - * but best to be sure). - */ - spin_lock_irqsave(&dd->uctxt_lock, flags); - tmp = dd->rcd; - dd->rcd = NULL; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); - if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, @@ -1495,16 +1523,22 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dd->rcvhdrtail_dummy_kvaddr = NULL; } - for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) { - struct hfi1_ctxtdata *rcd = tmp[ctxt]; + /* + * Free any resources still in use (usually just kernel contexts) + * at unload; we do for ctxtcnt, because that's what we allocate. + */ + for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) { + struct hfi1_ctxtdata *rcd = dd->rcd[ctxt]; - tmp[ctxt] = NULL; /* debugging paranoia */ if (rcd) { hfi1_clear_tids(rcd); - hfi1_rcd_put(rcd); + hfi1_free_ctxt(rcd); } } - kfree(tmp); + + kfree(dd->rcd); + dd->rcd = NULL; + free_pio_map(dd); /* must follow rcv context free - need to remove rcv's hooks */ for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++) diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index bebf0a8f214d..f9909d240dcc 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -114,24 +114,24 @@ TRACE_EVENT(hfi1_rcvhdr, ); TRACE_EVENT(hfi1_receive_interrupt, - TP_PROTO(struct hfi1_devdata *dd, u16 ctxt), - TP_ARGS(dd, ctxt), + TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd), + TP_ARGS(dd, rcd), TP_STRUCT__entry(DD_DEV_ENTRY(dd) __field(u32, ctxt) __field(u8, slow_path) __field(u8, dma_rtail) ), TP_fast_assign(DD_DEV_ASSIGN(dd); - __entry->ctxt = ctxt; - if (dd->rcd[ctxt]->do_interrupt == + __entry->ctxt = rcd->ctxt; + if (rcd->do_interrupt == &handle_receive_interrupt) { __entry->slow_path = 1; __entry->dma_rtail = 0xFF; - } else if (dd->rcd[ctxt]->do_interrupt == + } else if (rcd->do_interrupt == &handle_receive_interrupt_dma_rtail){ __entry->dma_rtail = 1; __entry->slow_path = 0; - } else if (dd->rcd[ctxt]->do_interrupt == + } else if (rcd->do_interrupt == &handle_receive_interrupt_nodma_rtail) { __entry->dma_rtail = 0; __entry->slow_path = 0; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c91456c16cdb..2917a238a343 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -146,11 +146,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, return ret; bail: - /* - * hfi1_free_ctxt() will call hfi1_free_ctxtdata(), which will - * release send_context structure if uctxt->sc is not null - */ - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); return ret; } @@ -158,15 +154,12 @@ bail: static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) { - unsigned long flags; - dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); flush_wc(); if (dd->num_msix_entries) hfi1_reset_vnic_msix_info(uctxt); - spin_lock_irqsave(&dd->uctxt_lock, flags); /* * Disable receive context and interrupt available, reset all * RcvCtxtCtrl bits to default values. @@ -189,7 +182,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, sc_disable(uctxt->sc); dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; - spin_unlock_irqrestore(&dd->uctxt_lock, flags); uctxt->event_flags = 0; @@ -198,7 +190,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, hfi1_stats.sps_ctxts--; - hfi1_free_ctxt(dd, uctxt); + hfi1_free_ctxt(uctxt); } void hfi1_vnic_setup(struct hfi1_devdata *dd) -- cgit v1.2.3 From 13c19222889daf91da36b7fb63b5d5d9ce89b377 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:51 -0700 Subject: IB/rdmavt, hfi1, qib: Modify check_ah() to account for extended LIDs rvt_check_ah() delegates lid verification to underlying driver. Underlying driver uses different conditions to check for dlid depending on whether the device supports extended LIDs Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/common.h | 9 --------- drivers/infiniband/hw/hfi1/mad.c | 5 +++-- drivers/infiniband/hw/hfi1/verbs.c | 5 +++++ drivers/infiniband/hw/qib/qib_verbs.c | 9 +++++++++ drivers/infiniband/sw/rdmavt/ah.c | 10 ---------- drivers/infiniband/sw/rdmavt/qp.c | 29 +++++++++++++++++++++++------ include/rdma/opa_addr.h | 18 ++++++++++++++++++ 7 files changed, 58 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index ba9ab971ced9..aa416ef93c1a 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -333,15 +333,6 @@ struct diag_pkt { #define DEFAULT_P_KEY LIM_MGMT_P_KEY -/** - * 0xF8 - 4 bits of multicast range and 1 bit for collective range - * Example: For 24 bit LID space, - * Multicast range: 0xF00000 to 0xF7FFFF - * Collective range: 0xF80000 to 0xFFFFFE - */ -#define HFI1_MCAST_NR 0x4 /* Number of top bits set */ -#define HFI1_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ - #define HFI1_PSM_IOC_BASE_SEQ 0x0 static inline __u64 rhf_to_cpu(const __le32 *rbuf) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index cd1f6f841f34..21fadb4b510c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -46,6 +46,7 @@ */ #include +#include #define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \ / (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16))) @@ -905,8 +906,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, pi->buffer_units = cpu_to_be32(buffer_units); pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags); - pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7) - << 3 | (HFI1_MCAST_NR & 0x7)); + pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7) + << 3 | (OPA_MCAST_NR & 0x7)); /* HFI supports a replay buffer 128 LTPs in size */ pi->replay_depth.buffer = 0x80; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c88c03c11555..97ca42beb023 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "hfi.h" #include "common.h" @@ -1461,6 +1462,10 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) struct hfi1_devdata *dd; u8 sc5; + if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) && + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + /* test the mapping for validity */ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index ac42dce7e281..9d92aeb8d9a1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1341,6 +1341,15 @@ int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) if (rdma_ah_get_sl(ah_attr) > 15) return -EINVAL; + if (rdma_ah_get_dlid(ah_attr) == 0) + return -EINVAL; + if (rdma_ah_get_dlid(ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE) && + rdma_ah_get_dlid(ah_attr) != + be16_to_cpu(IB_LID_PERMISSIVE) && + !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) + return -EINVAL; + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index a96d4aa80ae8..ba3639a0d77c 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -66,8 +66,6 @@ int rvt_check_ah(struct ib_device *ibdev, int port_num = rdma_ah_get_port_num(ah_attr); struct ib_port_attr port_attr; struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - u32 dlid = rdma_ah_get_dlid(ah_attr); u8 ah_flags = rdma_ah_get_ah_flags(ah_attr); u8 static_rate = rdma_ah_get_static_rate(ah_attr); @@ -83,14 +81,6 @@ int rvt_check_ah(struct ib_device *ibdev, if ((ah_flags & IB_AH_GRH) && rdma_ah_read_grh(ah_attr)->sgid_index >= port_attr.gid_tbl_len) return -EINVAL; - if (link != IB_LINK_LAYER_ETHERNET) { - if (dlid == 0) - return -EINVAL; - if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) && - dlid != be16_to_cpu(IB_LID_PERMISSIVE) && - !(ah_flags & IB_AH_GRH)) - return -EINVAL; - } if (rdi->driver_f.check_ah) return rdi->driver_f.check_ah(ibdev, ah_attr); return 0; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index eb0c3d60c584..6f6525d24a2f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "qp.h" #include "vt.h" #include "trace.h" @@ -1066,6 +1067,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mig = 0; int pmtu = 0; /* for gcc warning only */ enum rdma_link_layer link; + int opa_ah; link = rdma_port_get_link_layer(ibqp->device, qp->port_num); @@ -1076,6 +1078,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num); if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, link)) @@ -1086,17 +1089,31 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto inval; if (attr_mask & IB_QP_AV) { - if (rdma_ah_get_dlid(&attr->ah_attr) >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; + if (opa_ah) { + if (rdma_ah_get_dlid(&attr->ah_attr) >= + opa_get_mcast_base(OPA_MCAST_NR)) + goto inval; + } else { + if (rdma_ah_get_dlid(&attr->ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + } + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) goto inval; } if (attr_mask & IB_QP_ALT_PATH) { - if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; + if (opa_ah) { + if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= + opa_get_mcast_base(OPA_MCAST_NR)) + goto inval; + } else { + if (rdma_ah_get_dlid(&attr->alt_ah_attr) >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + } + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) goto inval; if (attr->alt_pkey_index >= rvt_get_npkeys(rdi)) diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9b5e642cf550..8d3ad4ecbea1 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -48,10 +48,21 @@ #ifndef OPA_ADDR_H #define OPA_ADDR_H +#include + #define OPA_SPECIAL_OUI (0x00066AULL) #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +/** + * 0xF8 - 4 bits of multicast range and 1 bit for collective range + * Example: For 24 bit LID space, + * Multicast range: 0xF00000 to 0xF7FFFF + * Collective range: 0xF80000 to 0xFFFFFE + */ +#define OPA_MCAST_NR 0x4 /* Number of top bits set */ +#define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */ + /** * ib_is_opa_gid: Returns true if the top 24 bits of the gid * contains the OPA_STL_OUI identifier. This identifies that @@ -95,4 +106,11 @@ static inline bool opa_is_extended_lid(u32 dlid, u32 slid) else return false; } + +/* Get multicast lid base */ +static inline u32 opa_get_mcast_base(u32 nr_top_bits) +{ + return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); +} + #endif /* OPA_ADDR_H */ -- cgit v1.2.3 From 72c07e2b671eda1cf3e8ebabc664f542f673b997 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:53:58 -0700 Subject: IB/hfi1: Add support to receive 16B bypass packets We introduce a struct hfi1_16b_header to support 16B headers. 16B bypass packets are received by the driver and processed similar to 9B packets. Add basic support to handle 16B packets. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 ++ drivers/infiniband/hw/hfi1/common.h | 1 + drivers/infiniband/hw/hfi1/driver.c | 127 ++++++++++++++++++++++++++++---- drivers/infiniband/hw/hfi1/hfi.h | 131 ++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/uc.c | 2 +- drivers/infiniband/hw/hfi1/ud.c | 4 +- drivers/infiniband/hw/hfi1/verbs.c | 17 +++-- drivers/infiniband/hw/hfi1/verbs.h | 13 ++++ drivers/infiniband/hw/hfi1/vnic.h | 15 ---- drivers/infiniband/hw/hfi1/vnic_main.c | 4 +- include/rdma/opa_vnic.h | 3 - 12 files changed, 274 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 1446c16bc8a8..ee1324cce25a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14468,6 +14468,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd) static void init_rxe(struct hfi1_devdata *dd) { struct rsm_map_table *rmt; + u64 val; /* enable all receive errors */ write_csr(dd, RCV_ERR_MASK, ~0ull); @@ -14492,6 +14493,11 @@ static void init_rxe(struct hfi1_devdata *dd) * (64 bytes). Max_Payload_Size is possibly modified upward in * tune_pcie_caps() which is called after this routine. */ + + /* Have 16 bytes (4DW) of bypass header available in header queue */ + val = read_csr(dd, RCV_BYPASS); + val |= (4ull << 16); + write_csr(dd, RCV_BYPASS, val); } static void init_other(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index aa416ef93c1a..3e27794ec750 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -327,6 +327,7 @@ struct diag_pkt { /* misc. */ #define SC15_PACKET 0xF #define SIZE_OF_CRC 1 +#define SIZE_OF_LT 1 #define LIM_MGMT_P_KEY 0x7FFF #define FULL_MGMT_P_KEY 0xFFFF diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 14f2a00c13c2..5280d82c344e 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -237,6 +237,13 @@ static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, return (struct ib_header *)hfi1_get_header(dd, rhf_addr); } +static inline struct hfi1_16b_header + *hfi1_get_16B_header(struct hfi1_devdata *dd, + __le32 *rhf_addr) +{ + return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); +} + /* * Validate and encode the a given RcvArray Buffer size. * The function will check whether the given size falls within @@ -925,6 +932,11 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, packet->rhf_addr); sc = hfi1_9B_get_sc5(hdr, packet->rhf); + } else if (etype == RHF_RCV_TYPE_BYPASS) { + struct hfi1_16b_header *hdr = hfi1_get_16B_header( + packet->rcd->dd, + packet->rhf_addr); + sc = hfi1_16B_get_sc(hdr); } if (sc != SC15_PACKET) { int hwstate = driver_lstate(rcd->ppd); @@ -1386,9 +1398,14 @@ static int hfi1_setup_9B_packet(struct hfi1_packet *packet) } /* Query commonly used fields from packet header */ + packet->payload = packet->ebuf; packet->opcode = ib_bth_get_opcode(packet->ohdr); packet->slid = ib_get_slid(hdr); packet->dlid = ib_get_dlid(hdr); + if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE)))) + packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - + be16_to_cpu(IB_MULTICAST_LID_BASE); packet->sl = ib_get_sl(hdr); packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf); packet->pad = ib_bth_get_pad(packet->ohdr); @@ -1402,6 +1419,73 @@ drop: return -EINVAL; } +static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) +{ + /* + * Bypass packets have a different header/payload split + * compared to an IB packet. + * Current split is set such that 16 bytes of the actual + * header is in the header buffer and the remining is in + * the eager buffer. We chose 16 since hfi1 driver only + * supports 16B bypass packets and we will be able to + * receive the entire LRH with such a split. + */ + + struct hfi1_ctxtdata *rcd = packet->rcd; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_ibport *ibp = &ppd->ibport_data; + u8 l4; + u8 grh_len; + + packet->hdr = (struct hfi1_16b_header *) + hfi1_get_16B_header(packet->rcd->dd, + packet->rhf_addr); + packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; + + l4 = hfi1_16B_get_l4(packet->hdr); + if (l4 == OPA_16B_L4_IB_LOCAL) { + grh_len = 0; + packet->ohdr = packet->ebuf; + packet->grh = NULL; + } else if (l4 == OPA_16B_L4_IB_GLOBAL) { + u32 vtf; + + grh_len = sizeof(struct ib_grh); + packet->ohdr = packet->ebuf + grh_len; + packet->grh = packet->ebuf; + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(packet->grh->version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else { + goto drop; + } + + /* Query commonly used fields from packet header */ + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->hlen = hdr_len_by_opcode[packet->opcode] + 8 + grh_len; + packet->payload = packet->ebuf + packet->hlen - (4 * sizeof(u32)); + packet->slid = hfi1_16B_get_slid(packet->hdr); + packet->dlid = hfi1_16B_get_dlid(packet->hdr); + if (unlikely(hfi1_is_16B_mcast(packet->dlid))) + packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) - + opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), + 16B); + packet->sc = hfi1_16B_get_sc(packet->hdr); + packet->sl = ibp->sc_to_sl[packet->sc]; + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + packet->extra_byte = SIZE_OF_LT; + packet->fecn = hfi1_16B_get_fecn(packet->hdr); + packet->becn = hfi1_16B_get_becn(packet->hdr); + + return 0; +drop: + hfi1_cdbg(PKT, "%s: packet dropped\n", __func__); + ibp->rvp.n_pkt_drops++; + return -EINVAL; +} + void handle_eflags(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; @@ -1464,8 +1548,8 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet) if (packet->rcd->is_vnic) return true; - if ((HFI1_GET_L2_TYPE(packet->ebuf) == OPA_VNIC_L2_TYPE) && - (HFI1_GET_L4_TYPE(packet->ebuf) == OPA_VNIC_L4_ETHR)) + if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) && + (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR)) return true; return false; @@ -1475,25 +1559,38 @@ int process_receive_bypass(struct hfi1_packet *packet) { struct hfi1_devdata *dd = packet->rcd->dd; - if (unlikely(rhf_err_flags(packet->rhf))) { - handle_eflags(packet); - } else if (hfi1_is_vnic_packet(packet)) { + if (hfi1_is_vnic_packet(packet)) { hfi1_vnic_bypass_rcv(packet); return RHF_RCV_CONTINUE; } - dd_dev_err(dd, "Unsupported bypass packet. Dropping\n"); - incr_cntr64(&dd->sw_rcv_bypass_packet_errors); - if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { - u64 *flits = packet->ebuf; + if (hfi1_setup_bypass_packet(packet)) + return RHF_RCV_CONTINUE; + + if (unlikely(rhf_err_flags(packet->rhf))) { + handle_eflags(packet); + return RHF_RCV_CONTINUE; + } - if (flits && !(packet->rhf & RHF_LEN_ERR)) { - dd->err_info_rcvport.packet_flit1 = flits[0]; - dd->err_info_rcvport.packet_flit2 = - packet->tlen > sizeof(flits[0]) ? flits[1] : 0; + if (hfi1_16B_get_l2(packet->hdr) == 0x2) { + hfi1_16B_rcv(packet); + } else { + dd_dev_err(dd, + "Bypass packets other than 16B are not supported in normal operation. Dropping\n"); + incr_cntr64(&dd->sw_rcv_bypass_packet_errors); + if (!(dd->err_info_rcvport.status_and_code & + OPA_EI_STATUS_SMASK)) { + u64 *flits = packet->ebuf; + + if (flits && !(packet->rhf & RHF_LEN_ERR)) { + dd->err_info_rcvport.packet_flit1 = flits[0]; + dd->err_info_rcvport.packet_flit2 = + packet->tlen > sizeof(flits[0]) ? + flits[1] : 0; + } + dd->err_info_rcvport.status_and_code |= + (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } - dd->err_info_rcvport.status_and_code |= - (OPA_EI_STATUS_SMASK | BAD_L2_ERR); } return RHF_RCV_CONTINUE; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fa9160f68bb7..dbbad760cad4 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -325,6 +326,7 @@ struct hfi1_ctxtdata { struct hfi1_packet { void *ebuf; void *hdr; + void *payload; struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; @@ -351,6 +353,83 @@ struct hfi1_packet { bool fecn; }; +/* + * OPA 16B Header + */ +#define OPA_16B_L4_MASK 0xFFull +#define OPA_16B_SC_MASK 0x1F00000ull +#define OPA_16B_SC_SHIFT 20 +#define OPA_16B_LID_MASK 0xFFFFFull +#define OPA_16B_DLID_MASK 0xF000ull +#define OPA_16B_DLID_SHIFT 20 +#define OPA_16B_DLID_HIGH_SHIFT 12 +#define OPA_16B_SLID_MASK 0xF00ull +#define OPA_16B_SLID_SHIFT 20 +#define OPA_16B_SLID_HIGH_SHIFT 8 +#define OPA_16B_BECN_MASK 0x80000000ull +#define OPA_16B_BECN_SHIFT 31 +#define OPA_16B_FECN_MASK 0x10000000ull +#define OPA_16B_FECN_SHIFT 28 +#define OPA_16B_L2_MASK 0x60000000ull +#define OPA_16B_L2_SHIFT 29 + +/* + * OPA 16B L2/L4 Encodings + */ +#define OPA_16B_L2_TYPE 0x02 +#define OPA_16B_L4_IB_LOCAL 0x09 +#define OPA_16B_L4_IB_GLOBAL 0x0A +#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR + +static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) +{ + return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); +} + +static inline u8 hfi1_16B_get_sc(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_SC_MASK) >> OPA_16B_SC_SHIFT); +} + +static inline u32 hfi1_16B_get_dlid(struct hfi1_16b_header *hdr) +{ + return (u32)((hdr->lrh[1] & OPA_16B_LID_MASK) | + (((hdr->lrh[2] & OPA_16B_DLID_MASK) >> + OPA_16B_DLID_HIGH_SHIFT) << OPA_16B_DLID_SHIFT)); +} + +static inline u32 hfi1_16B_get_slid(struct hfi1_16b_header *hdr) +{ + return (u32)((hdr->lrh[0] & OPA_16B_LID_MASK) | + (((hdr->lrh[2] & OPA_16B_SLID_MASK) >> + OPA_16B_SLID_HIGH_SHIFT) << OPA_16B_SLID_SHIFT)); +} + +static inline u8 hfi1_16B_get_becn(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[0] & OPA_16B_BECN_MASK) >> OPA_16B_BECN_SHIFT); +} + +static inline u8 hfi1_16B_get_fecn(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_FECN_MASK) >> OPA_16B_FECN_SHIFT); +} + +static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT); +} + +/* + * BTH + */ +#define OPA_16B_BTH_PAD_MASK 7 +static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) +{ + return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) & + OPA_16B_BTH_PAD_MASK); +} + struct rvt_sge_state; /* @@ -2084,11 +2163,55 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); /* * hfi1_check_mcast- Check if the given lid is - * in the IB multicast range. + * in the OPA multicast range. + * + * The LID might either reside in ah.dlid or might be + * in the GRH of the address handle as DGID if extended + * addresses are in use. */ -static inline bool hfi1_check_mcast(u16 lid) +static inline bool hfi1_check_mcast(u32 lid) +{ + return ((lid >= opa_get_mcast_base(OPA_MCAST_NR)) && + (lid != be32_to_cpu(OPA_LID_PERMISSIVE))); +} + +#define opa_get_lid(lid, format) \ + __opa_get_lid(lid, OPA_PORT_PACKET_FORMAT_##format) + +/* Convert a lid to a specific lid space */ +static inline u32 __opa_get_lid(u32 lid, u8 format) +{ + bool is_mcast = hfi1_check_mcast(lid); + + switch (format) { + case OPA_PORT_PACKET_FORMAT_8B: + case OPA_PORT_PACKET_FORMAT_10B: + if (is_mcast) + return (lid - opa_get_mcast_base(OPA_MCAST_NR) + + 0xF0000); + return lid & 0xFFFFF; + case OPA_PORT_PACKET_FORMAT_16B: + if (is_mcast) + return (lid - opa_get_mcast_base(OPA_MCAST_NR) + + 0xF00000); + return lid & 0xFFFFFF; + case OPA_PORT_PACKET_FORMAT_9B: + if (is_mcast) + return (lid - + opa_get_mcast_base(OPA_MCAST_NR) + + be16_to_cpu(IB_MULTICAST_LID_BASE)); + else + return lid & 0xFFFF; + default: + return lid; + } +} + +/* Return true if the given lid is the OPA 16B multicast range */ +static inline bool hfi1_is_16B_mcast(u32 lid) { - return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (lid != be16_to_cpu(IB_LID_PERMISSIVE))); + return ((lid >= + opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) && + (lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))); } #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index baa67bf0772b..cf74a56e20e5 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1916,7 +1916,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 76c2451a53d7..366f7b9517fe 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -297,7 +297,7 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct ib_other_headers *ohdr = packet->ohdr; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 6bf7a1b08491..dcf8c14c6d0e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -667,11 +667,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_header *hdr = packet->hdr; - void *data = packet->ebuf; + void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); - u32 bth1; u8 sl_from_sc; u8 extra_bytes = packet->pad; u8 opcode = packet->opcode; @@ -679,7 +678,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; - bth1 = be32_to_cpu(ohdr->bth[1]); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); pkey = ib_bth_get_pkey(ohdr); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 97ca42beb023..ebddab1a06f4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -571,7 +571,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, goto drop; mcast = rvt_mcast_find(&ibp->rvp, &packet->grh->dgid, - packet->dlid); + opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { @@ -627,14 +627,17 @@ drop: void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - bool is_mcast = false; - if (unlikely(hfi1_check_mcast(packet->dlid))) - is_mcast = true; + trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); + hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); +} + +void hfi1_16B_rcv(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; - trace_input_ibhdr(rcd->dd, packet, - !!(packet->rhf & RHF_DC_INFO_SMASK)); - hfi1_handle_packet(packet, is_mcast); + trace_input_ibhdr(rcd->dd, packet, false); + hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); } /* diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 34267c7ef85a..590aab270f98 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -104,6 +104,17 @@ enum { HFI1_HAS_GRH = (1 << 0), }; +struct hfi1_16b_header { + u32 lrh[4]; + union { + struct { + struct ib_grh grh; + struct ib_other_headers oth; + } l; + struct ib_other_headers oth; + } u; +} __packed; + struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -378,6 +389,8 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *); void hfi1_ib_rcv(struct hfi1_packet *packet); +void hfi1_16B_rcv(struct hfi1_packet *packet); + unsigned hfi1_get_npkeys(struct hfi1_devdata *); int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h index eec7c1424991..5ae781514e32 100644 --- a/drivers/infiniband/hw/hfi1/vnic.h +++ b/drivers/infiniband/hw/hfi1/vnic.h @@ -54,21 +54,6 @@ #define HFI1_VNIC_MAX_TXQ 16 #define HFI1_VNIC_MAX_PAD 12 -/* L2 header definitions */ -#define HFI1_L2_TYPE_OFFSET 0x7 -#define HFI1_L2_TYPE_SHFT 0x5 -#define HFI1_L2_TYPE_MASK 0x3 - -#define HFI1_GET_L2_TYPE(hdr) \ - ((*((u8 *)(hdr) + HFI1_L2_TYPE_OFFSET) >> HFI1_L2_TYPE_SHFT) & \ - HFI1_L2_TYPE_MASK) - -/* L4 type definitions */ -#define HFI1_L4_TYPE_OFFSET 8 - -#define HFI1_GET_L4_TYPE(data) \ - (*((u8 *)(data) + HFI1_L4_TYPE_OFFSET)) - /* L4 header definitions */ #define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index 2917a238a343..f419cbb05928 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -564,8 +564,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) int l4_type, vesw_id = -1; u8 q_idx; - l4_type = HFI1_GET_L4_TYPE(packet->ebuf); - if (likely(l4_type == OPA_VNIC_L4_ETHR)) { + l4_type = hfi1_16B_get_l4(packet->ebuf); + if (likely(l4_type == OPA_16B_L4_ETHR)) { vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h index 39d6890616a6..0c07a70bd7f6 100644 --- a/include/rdma/opa_vnic.h +++ b/include/rdma/opa_vnic.h @@ -54,9 +54,6 @@ #include -/* VNIC uses 16B header format */ -#define OPA_VNIC_L2_TYPE 0x2 - /* 16 header bytes + 2 reserved bytes */ #define OPA_VNIC_L2_HDR_LEN (16 + 2) -- cgit v1.2.3 From 30e07416cf48801f127019c1dfece8039f1da8e2 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:04 -0700 Subject: IB/hfi1: Add support to send 16B bypass packets We introduce struct hfi1_opa_header as a union of ib (9B) and 16B headers. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 33 ++++++++++++++++++------------- drivers/infiniband/hw/hfi1/ruc.c | 17 ++++++++-------- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 19 +++++++++--------- drivers/infiniband/hw/hfi1/uc.c | 4 ++-- drivers/infiniband/hw/hfi1/ud.c | 25 ++++++++++++----------- drivers/infiniband/hw/hfi1/verbs.c | 24 +++++++++++++++++++--- drivers/infiniband/hw/hfi1/verbs.h | 22 ++++++++++----------- 7 files changed, 84 insertions(+), 60 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index cf74a56e20e5..e3dbf6d45afe 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -273,9 +273,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && @@ -724,7 +724,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct ib_header hdr; + struct hfi1_opa_header opah; + struct ib_header *hdr; struct ib_other_headers *ohdr; unsigned long flags; @@ -741,16 +742,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, goto queue_ack; /* Construct the header */ + opah.hdr_type = 0; + hdr = &opah.ibh; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ hwords = 6; if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - hwords += hfi1_make_grh(ibp, &hdr.u.l.grh, + hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, rdma_ah_read_grh(&qp->remote_ah_attr), hwords, 0); - ohdr = &hdr.u.l.oth; + ohdr = &hdr->u.l.oth; lrh0 = HFI1_LRH_GRH; } else { - ohdr = &hdr.u.oth; + ohdr = &hdr->u.oth; lrh0 = HFI1_LRH_BTH; } /* read pkey_index w/o lock (its atomic) */ @@ -768,11 +772,11 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(ppd->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); + hdr->lrh[0] = cpu_to_be16(lrh0); + hdr->lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); + hdr->lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); + hdr->lrh[3] = cpu_to_be16(ppd->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); @@ -799,10 +803,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, } trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &hdr, ib_is_sc5(sc5)); + &opah, ib_is_sc5(sc5)); /* write the pbc and data */ - ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, hdr, hwords); return; @@ -985,9 +989,10 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) { struct ib_other_headers *ohdr; + struct ib_header *hdr = &opah->ibh; struct rvt_swqe *wqe; u32 opcode; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 4afa00f921f2..e30c64fcce67 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -668,7 +668,8 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) +#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \ + hdr.ibh.u.oth.bth[2]) / 4) /** * build_ahg - create ahg in s_ahg @@ -743,8 +744,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - rdma_ah_read_grh(&qp->remote_ah_attr), + &ps->s_txreq->phdr.hdr.ibh.u.l.grh, + &qp->remote_ah_attr.grh, qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; middle = 0; @@ -773,14 +774,14 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, build_ahg(qp, bth2); else qp->s_flags &= ~RVT_S_AHG_VALID; - ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = + ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.ibh.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - ps->s_txreq->phdr.hdr.lrh[2] = + ps->s_txreq->phdr.hdr.ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.lrh[3] = + ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); + rdma_ah_get_path_bits(&qp->remote_ah_attr)); bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 0f2d2da057ec..73240255ffc5 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -213,9 +213,9 @@ DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, + struct hfi1_opa_header *opah, bool sc5), - TP_ARGS(dd, hdr, sc5), + TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) __field(u8, lnh) @@ -238,10 +238,11 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u32, psn) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(hdr)) + hfi1_trace_ib_hdr_len(&opah->ibh)) ), TP_fast_assign( struct ib_other_headers *ohdr; + struct ib_header *hdr = &opah->ibh; DD_DEV_ASSIGN(dd); @@ -294,18 +295,18 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr, TP_PROTO(struct hfi1_devdata *dd, - struct ib_header *hdr, bool sc5), - TP_ARGS(dd, hdr, sc5)); + struct hfi1_opa_header *opah, bool sc5), + TP_ARGS(dd, opah, sc5)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 366f7b9517fe..e0bb766ae36c 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -93,9 +93,9 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* Get the next send request. */ wqe = rvt_get_swqe_ptr(qp, qp->s_cur); diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index dcf8c14c6d0e..2af993cbe5af 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -357,12 +357,13 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { /* Header size in 32-bit words. */ - qp->s_hdrwords += hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.u.l.grh, - rdma_ah_read_grh(ah_attr), - qp->s_hdrwords, nwords); + qp->s_hdrwords += + hfi1_make_grh(ibp, + &ps->s_txreq->phdr.hdr.ibh.u.l.grh, + rdma_ah_read_grh(ah_attr), + qp->s_hdrwords, nwords); lrh0 = HFI1_LRH_GRH; - ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; /* * Don't worry about sending to locally attached multicast * QPs. It is unspecified by the spec. what happens. @@ -370,7 +371,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } else { /* Header size in 32-bit words. */ lrh0 = HFI1_LRH_BTH; - ohdr = &ps->s_txreq->phdr.hdr.u.oth; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; } if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { qp->s_hdrwords++; @@ -392,21 +393,21 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ps->s_txreq->sde = priv->s_sde; priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; - ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.lrh[1] = + ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); + ps->s_txreq->phdr.hdr.ibh.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(ah_attr)); - ps->s_txreq->phdr.hdr.lrh[2] = + ps->s_txreq->phdr.hdr.ibh.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { - ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; } else { lid = ppd->lid; if (lid) { lid |= rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1); - ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid); + ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(lid); } else { - ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE; + ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; } } if (wqe->wr.send_flags & IB_SEND_SOLICITED) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ebddab1a06f4..4aec805c645d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -506,6 +506,24 @@ again: } } +static u8 get_opcode(struct hfi1_opa_header *hdr) +{ + struct ib_other_headers *ohdr; + + if (hdr->hdr_type) { + if (hfi1_16B_get_l4(&hdr->opah) == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr->opah.u.oth; + else + ohdr = &hdr->opah.u.l.oth; + } else { + if (ib_get_lnh(&hdr->ibh) == HFI1_LRH_BTH) + ohdr = &hdr->ibh.u.oth; + else + ohdr = &hdr->ibh.u.l.oth; + } + return ib_bth_get_opcode(ohdr); +} + /* * Make sure the QP is ready and able to accept the given opcode. */ @@ -686,7 +704,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct ib_header *hdr; + struct hfi1_opa_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -1175,7 +1193,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct ib_header *h = &tx->phdr.hdr; + struct hfi1_opa_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1221,7 +1239,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int ret; u8 lnh; - hdr = &ps->s_txreq->phdr.hdr; + hdr = &ps->s_txreq->phdr.hdr.ibh; /* locate the pkey within the headers */ lnh = ib_get_lnh(hdr); if (lnh == HFI1_LRH_GRH) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 590aab270f98..20224100cbc5 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -115,6 +115,14 @@ struct hfi1_16b_header { } u; } __packed; +struct hfi1_opa_header { + union { + struct ib_header ibh; /* 9B header */ + struct hfi1_16b_header opah; /* 16B header */ + }; + u8 hdr_type; /* 9B or 16B */ +} __packed; + struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -124,7 +132,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct ib_header hdr; + struct hfi1_opa_header hdr; } __packed; /* @@ -326,7 +334,7 @@ u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); @@ -347,16 +355,6 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct ib_header *h) -{ - u16 lnh = be16_to_cpu(h->lrh[0]) & 3; - - if (lnh == IB_LNH_IBA_LOCAL) - return be32_to_cpu(h->u.oth.bth[0]) >> 24; - else - return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; -} - int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, -- cgit v1.2.3 From 5786adf3fde7aa22a68eedac9c59e40da76ffbfb Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:10 -0700 Subject: IB/hfi1: Add support to process 16B header errors Enhance hdr_rcverr() to also handle errors during 16B bypass packet receive. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 58 ++++++++++++++++++++++++++++++------- drivers/infiniband/hw/hfi1/hfi.h | 13 +++++++-- drivers/infiniband/hw/hfi1/ruc.c | 31 +++++++++++++------- drivers/infiniband/hw/hfi1/ud.c | 3 +- drivers/infiniband/hw/hfi1/verbs.c | 39 ++++++++++++++++++++----- drivers/infiniband/hw/hfi1/verbs.h | 1 + 6 files changed, 112 insertions(+), 33 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 5280d82c344e..ae6a90d2a31c 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -269,8 +269,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, { struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); - u8 lnh = ib_get_lnh(rhdr); - bool has_grh = false; + u32 mlid_base; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct hfi1_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -278,14 +277,20 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; - if (lnh == HFI1_LRH_BTH) { - packet->ohdr = &rhdr->u.oth; - } else if (lnh == HFI1_LRH_GRH) { - has_grh = true; - packet->ohdr = &rhdr->u.l.oth; - packet->grh = &rhdr->u.l.grh; - } else { + if (packet->etype == RHF_RCV_TYPE_BYPASS) { goto drop; + } else { + u8 lnh = ib_get_lnh(rhdr); + + mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &rhdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + packet->ohdr = &rhdr->u.l.oth; + packet->grh = &rhdr->u.l.grh; + } else { + goto drop; + } } if (packet->rhf & RHF_TID_ERR) { @@ -293,14 +298,13 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ u32 dlid = ib_get_dlid(rhdr); u32 qp_num; - u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE); /* Sanity check packet */ if (tlen < 24) goto drop; /* Check for GRH */ - if (has_grh) { + if (packet->grh) { u32 vtf; struct ib_grh *grh = packet->grh; @@ -1370,6 +1374,35 @@ static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; } +static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) +{ + struct hfi1_pportdata *ppd = packet->rcd->ppd; + + /* slid and dlid cannot be 0 */ + if ((!packet->slid) || (!packet->dlid)) + return -EINVAL; + + /* Compare port lid with incoming packet dlid */ + if ((!(hfi1_is_16B_mcast(packet->dlid))) && + (packet->dlid != + opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { + if (packet->dlid != ppd->lid) + return -EINVAL; + } + + /* No multicast packets with SC15 */ + if ((hfi1_is_16B_mcast(packet->dlid)) && (packet->sc == 0xF)) + return -EINVAL; + + /* Packets with permissive DLID always on SC15 */ + if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), + 16B)) && + (packet->sc != 0xF)) + return -EINVAL; + + return 0; +} + static int hfi1_setup_9B_packet(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); @@ -1479,6 +1512,9 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) packet->fecn = hfi1_16B_get_fecn(packet->hdr); packet->becn = hfi1_16B_get_becn(packet->hdr); + if (hfi1_bypass_ingress_pkt_check(packet)) + goto drop; + return 0; drop: hfi1_cdbg(PKT, "%s: packet dropped\n", __func__); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index dbbad760cad4..ee19660ca2fa 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -372,6 +372,10 @@ struct hfi1_packet { #define OPA_16B_FECN_SHIFT 28 #define OPA_16B_L2_MASK 0x60000000ull #define OPA_16B_L2_SHIFT 29 +#define OPA_16B_PKEY_MASK 0xFFFF0000ull +#define OPA_16B_PKEY_SHIFT 16 +#define OPA_16B_LEN_MASK 0x7FF00000ull +#define OPA_16B_LEN_SHIFT 20 /* * OPA 16B L2/L4 Encodings @@ -420,6 +424,11 @@ static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr) return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT); } +static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr) +{ + return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT); +} + /* * BTH */ @@ -1597,9 +1606,9 @@ static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey, * by HW and rcv_pkey_check function should be called instead. */ static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey, - u8 sc5, u8 idx, u16 slid) + u8 sc5, u8 idx, u32 slid, bool force) { - if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) + if (!(force) && !(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) return 0; /* If SC15, pkey[0:14] must be 0x7fff */ diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index e30c64fcce67..d252f8f2207a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -227,15 +227,23 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) u32 sl = packet->sl; int migrated; u32 bth0, bth1; + u16 pkey; bth0 = be32_to_cpu(packet->ohdr->bth[0]); bth1 = be32_to_cpu(packet->ohdr->bth[1]); - migrated = bth0 & IB_BTH_MIG_REQ; + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + pkey = hfi1_16B_get_pkey(packet->hdr); + migrated = bth1 & OPA_BTH_MIG_REQ; + } else { + pkey = ib_bth_get_pkey(packet->ohdr); + migrated = bth0 & IB_BTH_MIG_REQ; + } if (qp->s_mig_state == IB_MIG_ARMED && migrated) { if (!packet->grh) { - if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & - IB_AH_GRH) + if ((rdma_ah_get_ah_flags(&qp->alt_ah_attr) & + IB_AH_GRH) && + (packet->etype != RHF_RCV_TYPE_BYPASS)) return 1; } else { const struct ib_global_route *grh; @@ -254,10 +262,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) grh->dgid.global.interface_id)) return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, sc5, slid))) { - hfi1_bad_pkey(ibp, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, + slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ @@ -270,8 +278,9 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) spin_unlock_irqrestore(&qp->s_lock, flags); } else { if (!packet->grh) { - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & - IB_AH_GRH) + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & + IB_AH_GRH) && + (packet->etype != RHF_RCV_TYPE_BYPASS)) return 1; } else { const struct ib_global_route *grh; @@ -290,10 +299,10 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) grh->dgid.global.interface_id)) return 1; } - if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, + if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, sc5, slid))) { - hfi1_bad_pkey(ibp, (u16)bth0, sl, - 0, qp->ibqp.qp_num, slid, dlid); + hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, + slid, dlid); return 1; } /* Validate the SLID. See Ch. 9.6.1.5 */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 2af993cbe5af..b708376b67da 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -109,7 +109,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); if (unlikely(ingress_pkey_check(ppd, pkey, sc5, - qp->s_pkey_index, slid))) { + qp->s_pkey_index, + slid, false))) { hfi1_bad_pkey(ibp, pkey, rdma_ah_get_sl(ah_attr), sqp->ibqp.qp_num, qp->ibqp.qp_num, diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 4aec805c645d..0b1556fed47e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -568,6 +568,24 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) return pbc; } +static int hfi1_do_pkey_check(struct hfi1_packet *packet) +{ + struct hfi1_ctxtdata *rcd = packet->rcd; + struct hfi1_pportdata *ppd = rcd->ppd; + struct hfi1_16b_header *hdr = packet->hdr; + u16 pkey; + + /* Pkey check needed only for bypass packets */ + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return 0; + + /* Perform pkey check */ + pkey = hfi1_16B_get_pkey(hdr); + return ingress_pkey_check(ppd, pkey, packet->sc, + packet->qp->s_pkey_index, + packet->slid, true); +} + static inline void hfi1_handle_packet(struct hfi1_packet *packet, bool is_mcast) { @@ -594,6 +612,8 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, goto drop; list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; + if (hfi1_do_pkey_check(packet)) + goto drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -613,15 +633,16 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, qp_num = ib_bth_get_qpn(packet->ohdr); rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); - if (!packet->qp) { - rcu_read_unlock(); - goto drop; - } + if (!packet->qp) + goto unlock_drop; + + if (hfi1_do_pkey_check(packet)) + goto unlock_drop; + if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, - true))) { - rcu_read_unlock(); - goto drop; - } + true))) + goto unlock_drop; + spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -632,6 +653,8 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, rcu_read_unlock(); } return; +unlock_drop: + rcu_read_unlock(); drop: ibp->rvp.n_pkt_drops++; } diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 20224100cbc5..68577a0c922b 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -95,6 +95,7 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define OPA_BTH_MIG_REQ BIT(31) #define RC_OP(x) IB_OPCODE_RC_##x #define UC_OP(x) IB_OPCODE_UC_##x -- cgit v1.2.3 From d98bb7f7e6fa29d45008370084d5cabac7ac69ed Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:16 -0700 Subject: IB/hfi1: Determine 9B/16B L2 header type based on Address handle When address handle attributes are initialized, the LIDs are transformed to be in the 32 bit LID space. When constructing the header, hfi1 driver will look at the LID to determine the packet header to be created. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 21 +++++--- drivers/infiniband/core/uverbs_cmd.c | 3 ++ drivers/infiniband/hw/hfi1/hfi.h | 92 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/qp.c | 28 +++++++++++ drivers/infiniband/hw/hfi1/verbs.c | 12 +++++ drivers/infiniband/hw/hfi1/verbs.h | 1 + include/rdma/ib_verbs.h | 15 ++++++ include/rdma/opa_addr.h | 4 +- 8 files changed, 168 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index da29e2863c84..0179b21bad34 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "sa.h" #include "core_priv.h" @@ -1239,6 +1240,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); + + if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && + (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))) + rdma_ah_set_make_grd(ah_attr, true); + rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & get_src_path_mask(device, port_num)); @@ -2288,12 +2294,15 @@ static void update_sm_ah(struct work_struct *work) rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); if (port_attr.grh_required) { - rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); - - rdma_ah_set_subnet_prefix(&ah_attr, - cpu_to_be64(port_attr.subnet_prefix)); - rdma_ah_set_interface_id(&ah_attr, - cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); + if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { + rdma_ah_set_make_grd(&ah_attr, true); + } else { + rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); + rdma_ah_set_subnet_prefix(&ah_attr, + cpu_to_be64(port_attr.subnet_prefix)); + rdma_ah_set_interface_id(&ah_attr, + cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); + } } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 7ea5a3bb5a04..dc7d773a96ec 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2009,6 +2009,7 @@ static int modify_qp(struct ib_uverbs_file *file, rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate); rdma_ah_set_port_num(&attr->ah_attr, cmd->base.dest.port_num); + rdma_ah_set_make_grd(&attr->ah_attr, false); attr->alt_ah_attr.type = rdma_ah_find_type(qp->device, cmd->base.dest.port_num); @@ -2032,6 +2033,7 @@ static int modify_qp(struct ib_uverbs_file *file, cmd->base.alt_dest.static_rate); rdma_ah_set_port_num(&attr->alt_ah_attr, cmd->base.alt_dest.port_num); + rdma_ah_set_make_grd(&attr->alt_ah_attr, false); ret = ib_modify_qp_with_udata(qp, attr, modify_qp_mask(qp->qp_type, @@ -2584,6 +2586,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, } attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num); + rdma_ah_set_make_grd(&attr, false); rdma_ah_set_dlid(&attr, cmd.attr.dlid); rdma_ah_set_sl(&attr, cmd.attr.sl); rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index ee19660ca2fa..cec9590870ba 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -70,6 +70,7 @@ #include #include #include +#include #include "chip_registers.h" #include "common.h" @@ -353,6 +354,10 @@ struct hfi1_packet { bool fecn; }; +/* Packet types */ +#define HFI1_PKT_TYPE_9B 0 +#define HFI1_PKT_TYPE_16B 1 + /* * OPA 16B Header */ @@ -2170,6 +2175,31 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); #define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev)) #define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev)) +static inline void hfi1_update_ah_attr(struct ib_device *ibdev, + struct rdma_ah_attr *attr) +{ + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + u32 dlid = rdma_ah_get_dlid(attr); + + /* + * Kernel clients may not have setup GRH information + * Set that here. + */ + ibp = to_iport(ibdev, rdma_ah_get_port_num(attr)); + ppd = ppd_from_ibp(ibp); + if ((((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) || + (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))) && + (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) && + (!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))) || + (rdma_ah_get_make_grd(attr))) { + rdma_ah_set_ah_flags(attr, IB_AH_GRH); + rdma_ah_set_interface_id(attr, OPA_MAKE_ID(dlid)); + rdma_ah_set_subnet_prefix(attr, ibp->rvp.gid_prefix); + } +} + /* * hfi1_check_mcast- Check if the given lid is * in the OPA multicast range. @@ -2223,4 +2253,66 @@ static inline bool hfi1_is_16B_mcast(u32 lid) opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) && (lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))); } + +static inline void hfi1_make_opa_lid(struct rdma_ah_attr *attr) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(attr); + u32 dlid = rdma_ah_get_dlid(attr); + + /* Modify ah_attr.dlid to be in the 32 bit LID space. + * This is how the address will be laid out: + * Assuming MCAST_NR to be 4, + * 32 bit permissive LID = 0xFFFFFFFF + * Multicast LID range = 0xFFFFFFFE to 0xF0000000 + * Unicast LID range = 0xEFFFFFFF to 1 + * Invalid LID = 0 + */ + if (ib_is_opa_gid(&grh->dgid)) + dlid = opa_get_lid_from_gid(&grh->dgid); + else if ((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) && + (dlid != be32_to_cpu(OPA_LID_PERMISSIVE))) + dlid = dlid - be16_to_cpu(IB_MULTICAST_LID_BASE) + + opa_get_mcast_base(OPA_MCAST_NR); + else if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) + dlid = be32_to_cpu(OPA_LID_PERMISSIVE); + + rdma_ah_set_dlid(attr, dlid); +} + +static inline u8 hfi1_get_packet_type(u32 lid) +{ + /* 9B if lid > 0xF0000000 */ + if (lid >= opa_get_mcast_base(OPA_MCAST_NR)) + return HFI1_PKT_TYPE_9B; + + /* 16B if lid > 0xC000 */ + if (lid >= opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 9B)) + return HFI1_PKT_TYPE_16B; + + return HFI1_PKT_TYPE_9B; +} + +static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr) +{ + /* + * If there was an incoming 16B packet with permissive + * LIDs, OPA GIDs would have been programmed when those + * packets were received. A 16B packet will have to + * be sent in response to that packet. Return a 16B + * header type if that's the case. + */ + if (rdma_ah_get_dlid(attr) == be32_to_cpu(OPA_LID_PERMISSIVE)) + return (ib_is_opa_gid(&rdma_ah_read_grh(attr)->dgid)) ? + HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B; + + /* + * Return a 16B header type if either the the destination + * or source lid is extended. + */ + if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B) + return HFI1_PKT_TYPE_16B; + + return hfi1_get_packet_type(lid); +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index b801d8469956..0fca6dfe8d9f 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -232,6 +232,31 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, return 0; } +/* + * qp_set_16b - Set the hdr_type based on whether the slid or the + * dlid in the connection is extended. Only applicable for RC and UC + * QPs. UD QPs determine this on the fly from the ah in the wqe + */ +static inline void qp_set_16b(struct rvt_qp *qp) +{ + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + struct hfi1_qp_priv *priv = qp->priv; + + /* Update ah_attr to account for extended LIDs */ + hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr); + + /* Create 32 bit LIDs */ + hfi1_make_opa_lid(&qp->remote_ah_attr); + + if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) + return; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr); +} + void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -242,6 +267,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); + qp_set_16b(qp); } if (attr_mask & IB_QP_PATH_MIG_STATE && @@ -251,6 +277,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); + qp_set_16b(qp); } } @@ -751,6 +778,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp) qp->s_flags |= RVT_S_AHG_CLEAR; priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); + qp_set_16b(qp); ev.device = qp->ibqp.device; ev.element.qp = &qp->ibqp; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 0b1556fed47e..18b27276f202 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1421,6 +1421,15 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_2048); + /* + * sm_lid of 0xFFFF needs special handling so that it can + * be differentiated from a permissve LID of 0xFFFF. + * We set the grh_required flag here so the SA can program + * the DGID in the address handle appropriately + */ + if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)) + props->grh_required = true; + return 0; } @@ -1528,6 +1537,7 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, struct hfi1_pportdata *ppd; struct hfi1_devdata *dd; u8 sc5; + struct rdma_ah_attr *attr = &ah->attr; /* * Do not trust reading anything from rvt_ah at this point as it is not @@ -1537,6 +1547,8 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); ppd = ppd_from_ibp(ibp); sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)]; + hfi1_update_ah_attr(ibdev, attr); + hfi1_make_opa_lid(attr); dd = dd_from_ppd(ppd); ah->vl = sc_to_vlt(dd, sc5); if (ah->vl < num_vls || ah->vl == 15) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 68577a0c922b..d3dd0c01b8f6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -147,6 +147,7 @@ struct hfi1_qp_priv { u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; + u8 hdr_type; /* 9B or 16B */ }; /* diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 70a183179224..8f263930c56f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -864,6 +864,7 @@ struct roce_ah_attr { struct opa_ah_attr { u32 dlid; u8 src_path_bits; + bool make_grd; }; struct rdma_ah_attr { @@ -3625,6 +3626,20 @@ static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr) return 0; } +static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr, + bool make_grd) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + attr->opa.make_grd = make_grd; +} + +static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_OPA) + return attr->opa.make_grd; + return false; +} + static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num) { attr->port_num = port_num; diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 8d3ad4ecbea1..9ae126fb8648 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -71,7 +71,7 @@ * * @gid: The Global identifier */ -static inline bool ib_is_opa_gid(union ib_gid *gid) +static inline bool ib_is_opa_gid(const union ib_gid *gid) { return ((be64_to_cpu(gid->global.interface_id) >> 40) == OPA_SPECIAL_OUI); @@ -84,7 +84,7 @@ static inline bool ib_is_opa_gid(union ib_gid *gid) * * @gid: The Global identifier */ -static inline u32 opa_get_lid_from_gid(union ib_gid *gid) +static inline u32 opa_get_lid_from_gid(const union ib_gid *gid) { return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF; } -- cgit v1.2.3 From 88733e3b845024cb2324a68469a4a25fdd9c0a6c Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:23 -0700 Subject: IB/hfi1: Add 16B UD support Add 16B bypass packet support for UD traffic types. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 35 +-- drivers/infiniband/hw/hfi1/hfi.h | 117 +++++++++- drivers/infiniband/hw/hfi1/mad.c | 8 +- drivers/infiniband/hw/hfi1/ruc.c | 4 +- drivers/infiniband/hw/hfi1/ud.c | 421 +++++++++++++++++++++++++++--------- drivers/infiniband/hw/hfi1/verbs.h | 2 +- include/rdma/opa_addr.h | 1 + 7 files changed, 457 insertions(+), 131 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index ae6a90d2a31c..fc7085d6cf3f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -437,23 +437,33 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct ib_header *hdr = pkt->hdr; struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; u32 rqpn = 0, bth1; - u16 rlid, dlid = ib_get_dlid(hdr); - u8 sc, svc_type; + u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr); + u8 hdr_type, sc, svc_type; bool is_mcast = false; + if (pkt->etype == RHF_RCV_TYPE_BYPASS) { + is_mcast = hfi1_is_16B_mcast(dlid); + pkey = hfi1_16B_get_pkey(pkt->hdr); + sc = hfi1_16B_get_sc(pkt->hdr); + hdr_type = HFI1_PKT_TYPE_16B; + } else { + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + pkey = ib_bth_get_pkey(ohdr); + sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); + hdr_type = HFI1_PKT_TYPE_9B; + } + switch (qp->ibqp.qp_type) { case IB_QPT_SMI: case IB_QPT_GSI: case IB_QPT_UD: - rlid = ib_get_slid(hdr); - rqpn = ib_get_sqpn(ohdr); + rlid = ib_get_slid(pkt->hdr); + rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); break; case IB_QPT_UC: rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); @@ -469,14 +479,11 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hfi1_9B_get_sc5(hdr, pkt->rhf); - bth1 = be32_to_cpu(ohdr->bth[1]); - if (do_cnp && (bth1 & IB_FECN_SMASK)) { - u16 pkey = ib_bth_get_pkey(ohdr); - - return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - } + /* Call appropriate CNP handler */ + if (do_cnp && (bth1 & IB_FECN_SMASK)) + hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, + dlid, rlid, sc, grh); if (!is_mcast && (bth1 & IB_BECN_SMASK)) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cec9590870ba..7e21192da8e1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -831,6 +831,10 @@ struct hfi1_pportdata { typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); typedef void (*opcode_handler)(struct hfi1_packet *packet); +typedef void (*hfi1_make_req)(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); + /* return values for the RHF receive functions */ #define RHF_RCV_CONTINUE 0 /* keep going */ @@ -1373,6 +1377,13 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd); void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd); extern const struct pci_device_id hfi1_pci_tbl[]; +void hfi1_make_ud_req_9B(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); + +void hfi1_make_ud_req_16B(struct rvt_qp *qp, + struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe); /* receive packet handler dispositions */ #define RCV_PKT_OK 0x0 /* keep going */ @@ -1507,6 +1518,18 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); +void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh); +typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &return_cnp, + [HFI1_PKT_TYPE_16B] = &return_cnp_16B +}; #define PKEY_CHECK_INVALID -1 int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, u8 sc5, int8_t s_pkey_index); @@ -1747,12 +1770,22 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct ib_other_headers *ohdr = pkt->ohdr; - u32 bth1; - bth1 = be32_to_cpu(ohdr->bth[1]); - if (unlikely(bth1 & (IB_BECN_SMASK | IB_FECN_SMASK))) { + u32 bth1; + bool becn = false; + bool fecn = false; + + if (pkt->etype == RHF_RCV_TYPE_BYPASS) { + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); + } else { + bth1 = be32_to_cpu(ohdr->bth[1]); + fecn = bth1 & IB_FECN_SMASK; + becn = bth1 & IB_BECN_SMASK; + } + if (unlikely(fecn || becn)) { hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return !!(bth1 & IB_FECN_SMASK); + return fecn; } return false; } @@ -2315,4 +2348,80 @@ static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr) return hfi1_get_packet_type(lid); } + +static inline void hfi1_make_ext_grh(struct hfi1_packet *packet, + struct ib_grh *grh, u32 slid, + u32 dlid) +{ + struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + if (!ibp) + return; + + grh->hop_limit = 1; + grh->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; + if (slid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B)) + grh->sgid.global.interface_id = + OPA_MAKE_ID(be32_to_cpu(OPA_LID_PERMISSIVE)); + else + grh->sgid.global.interface_id = OPA_MAKE_ID(slid); + + /* + * Upper layers (like mad) may compare the dgid in the + * wc that is obtained here with the sgid_index in + * the wr. Since sgid_index in wr is always 0 for + * extended lids, set the dgid here to the default + * IB gid. + */ + grh->dgid.global.subnet_prefix = ibp->rvp.gid_prefix; + grh->dgid.global.interface_id = + cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]); +} + +static inline int hfi1_get_16b_padding(u32 hdr_size, u32 payload) +{ + return -(hdr_size + payload + (SIZE_OF_CRC << 2) + + SIZE_OF_LT) & 0x7; +} + +static inline void hfi1_make_ib_hdr(struct ib_header *hdr, + u16 lrh0, u16 len, + u16 dlid, u16 slid) +{ + hdr->lrh[0] = cpu_to_be16(lrh0); + hdr->lrh[1] = cpu_to_be16(dlid); + hdr->lrh[2] = cpu_to_be16(len); + hdr->lrh[3] = cpu_to_be16(slid); +} + +static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr, + u32 slid, u32 dlid, + u16 len, u16 pkey, + u8 becn, u8 fecn, u8 l4, + u8 sc) +{ + u32 lrh0 = 0; + u32 lrh1 = 0x40000000; + u32 lrh2 = 0; + u32 lrh3 = 0; + + lrh0 = (lrh0 & ~OPA_16B_BECN_MASK) | (becn << OPA_16B_BECN_SHIFT); + lrh0 = (lrh0 & ~OPA_16B_LEN_MASK) | (len << OPA_16B_LEN_SHIFT); + lrh0 = (lrh0 & ~OPA_16B_LID_MASK) | (slid & OPA_16B_LID_MASK); + lrh1 = (lrh1 & ~OPA_16B_FECN_MASK) | (fecn << OPA_16B_FECN_SHIFT); + lrh1 = (lrh1 & ~OPA_16B_SC_MASK) | (sc << OPA_16B_SC_SHIFT); + lrh1 = (lrh1 & ~OPA_16B_LID_MASK) | (dlid & OPA_16B_LID_MASK); + lrh2 = (lrh2 & ~OPA_16B_SLID_MASK) | + ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) | + ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | (pkey << OPA_16B_PKEY_SHIFT); + lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4; + + hdr->lrh[0] = lrh0; + hdr->lrh[1] = lrh1; + hdr->lrh[2] = lrh2; + hdr->lrh[3] = lrh3; +} #endif /* _HFI1_KERNEL_H */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 21fadb4b510c..1509bc6b76d8 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -373,12 +373,10 @@ static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid) * Send a bad P_Key trap (ch. 14.3.8). */ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2) + u32 qp1, u32 qp2, u32 lid1, u32 lid2) { struct trap_node *trap; u32 lid = ppd_from_ibp(ibp)->lid; - u32 _lid1 = lid1; - u32 _lid2 = lid2; ibp->rvp.n_pkt_drops++; ibp->rvp.pkey_violations++; @@ -389,8 +387,8 @@ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, return; /* Send violation trap */ - trap->data.ntc_257_258.lid1 = cpu_to_be32(_lid1); - trap->data.ntc_257_258.lid2 = cpu_to_be32(_lid2); + trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1); + trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2); trap->data.ntc_257_258.key = cpu_to_be32(key); trap->data.ntc_257_258.sl = sl << 3; trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1); diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index d252f8f2207a..6839bfae933c 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -649,7 +649,7 @@ done: * @ibp: a pointer to the IB port * @hdr: a pointer to the GRH header being constructed * @grh: the global route address to send to - * @hwords: the number of 32 bit words of header being sent + * @hwords: size of header after grh being sent in dwords * @nwords: the number of 32 bit words of data being sent * * Return the size of the header in 32 bit words. @@ -661,7 +661,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | (grh->flow_label << IB_GRH_FLOW_SHIFT)); - hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); + hdr->paylen = cpu_to_be16((hwords + nwords) << 2); /* next_hdr is defined by C8-7 in ch. 8.4.1 */ hdr->next_hdr = IB_GRH_NEXT_HDR; hdr->hop_limit = grh->hop_limit; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index b708376b67da..2ba74fdd6f15 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -53,6 +53,12 @@ #include "verbs_txreq.h" #include "qp.h" +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_req hfi1_make_ud_req_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_ud_req_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_ud_req_16B +}; + /** * ud_loopback - handle send on loopback QPs * @sqp: the sending QP @@ -67,6 +73,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) { struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); struct hfi1_pportdata *ppd; + struct hfi1_qp_priv *priv = sqp->priv; struct rvt_qp *qp; struct rdma_ah_attr *ah_attr; unsigned long flags; @@ -102,7 +109,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (qp->ibqp.qp_num > 1) { u16 pkey; - u16 slid; + u32 slid; u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index); @@ -176,9 +183,33 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { struct ib_grh grh; - const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); + struct ib_global_route grd = *(rdma_ah_read_grh(ah_attr)); + + /* + * For loopback packets with extended LIDs, the + * sgid_index in the GRH is 0 and the dgid is + * OPA GID of the sender. While creating a response + * to the loopback packet, IB core creates the new + * sgid_index from the DGID and that will be the + * OPA_GID_INDEX. The new dgid is from the sgid + * index and that will be in the IB GID format. + * + * We now have a case where the sent packet had a + * different sgid_index and dgid compared to the + * one that was received in response. + * + * Fix this inconsistency. + */ + if (priv->hdr_type == HFI1_PKT_TYPE_16B) { + if (grd.sgid_index == 0) + grd.sgid_index = OPA_GID_INDEX; - hfi1_make_grh(ibp, &grh, grd, 0, 0); + if (ib_is_opa_gid(&grd.dgid)) + grd.dgid.global.interface_id = + cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]); + } + + hfi1_make_grh(ibp, &grh, &grd, 0, 0); hfi1_copy_sge(&qp->r_sge, &grh, sizeof(grh), true, false); wc.wc_flags |= IB_WC_GRH; @@ -235,7 +266,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) wc.pkey_index = 0; } wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); + ((1 << ppd->lmc) - 1)); /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); @@ -252,6 +283,183 @@ drop: rcu_read_unlock(); } +static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe, + struct ib_other_headers *ohdr, + u16 *pkey, u32 extra_bytes, bool bypass) +{ + u32 bth0; + struct hfi1_ibport *ibp; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; + } else { + bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + } + + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth0 |= extra_bytes << 20; + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) + *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + else + *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + if (!bypass) + bth0 |= *pkey; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); + ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); + /* + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? + qp->qkey : wqe->ud_wr.remote_qkey); + ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); +} + +void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe) +{ + u32 nwords, extra_bytes; + u16 len, slid, dlid, pkey; + u16 lrh0 = 0; + u8 sc5; + struct hfi1_qp_priv *priv = qp->priv; + struct ib_other_headers *ohdr; + struct rdma_ah_attr *ah_attr; + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + struct ib_grh *grh; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + + extra_bytes = -wqe->length & 3; + nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC; + /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ + qp->s_hdrwords = 7; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + qp->s_hdrwords++; + + if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { + grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, + rdma_ah_read_grh(ah_attr), + qp->s_hdrwords - 2, nwords); + lrh0 = HFI1_LRH_GRH; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + } else { + lrh0 = HFI1_LRH_BTH; + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } + + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; + if (qp->ibqp.qp_type == IB_QPT_SMI) { + lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ + priv->s_sc = 0xf; + } else { + lrh0 |= (sc5 & 0xf) << 12; + priv->s_sc = sc5; + } + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); + if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } else { + u16 lid = (u16)ppd->lid; + + if (lid) { + lid |= rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1); + slid = lid; + } else { + slid = be16_to_cpu(IB_LID_PERMISSIVE); + } + } + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false); + len = qp->s_hdrwords + nwords; + + /* Setup the packet */ + ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B; + hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, + lrh0, len, dlid, slid); +} + +void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + struct rvt_swqe *wqe) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct ib_other_headers *ohdr; + struct rdma_ah_attr *ah_attr; + struct hfi1_pportdata *ppd; + struct hfi1_ibport *ibp; + u32 dlid, slid, nwords, extra_bytes; + u16 len, pkey; + u8 l4, sc5; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; + /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ + qp->s_hdrwords = 9; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + qp->s_hdrwords++; + + /* SW provides space for CRC and LT for bypass packets. */ + extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), + wqe->length); + nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; + + if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) { + struct ib_grh *grh; + struct ib_global_route *grd = rdma_ah_retrieve_grh(ah_attr); + /* + * Ensure OPA GIDs are transformed to IB gids + * before creating the GRH. + */ + if (grd->sgid_index == OPA_GID_INDEX) { + dd_dev_warn(ppd->dd, "Bad sgid_index. sgid_index: %d\n", + grd->sgid_index); + grd->sgid_index = 0; + } + grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, + qp->s_hdrwords - 4, nwords); + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + } else { + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + l4 = OPA_16B_L4_IB_LOCAL; + } + + sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; + if (qp->ibqp.qp_type == IB_QPT_SMI) + priv->s_sc = 0xf; + else + priv->s_sc = sc5; + + dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 16B); + if (!ppd->lid) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); + else + slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1)); + + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + /* Convert dwords to flits */ + len = (qp->s_hdrwords + nwords) >> 1; + + /* Setup the packet */ + ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B; + hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, + slid, dlid, len, pkey, 0, 0, l4, priv->s_sc); +} + /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP @@ -263,18 +471,12 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct ib_other_headers *ohdr; struct rdma_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; - u32 nwords; - u32 extra_bytes; - u32 bth0; - u16 lrh0; - u16 lid; int next_cur; - u8 sc5; + u32 lid; ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) @@ -311,13 +513,14 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - if (rdma_ah_get_dlid(ah_attr) < be16_to_cpu(IB_MULTICAST_LID_BASE) || - rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { + priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr); + if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) || + (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) { lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1); if (unlikely(!loopback && - (lid == ppd->lid || - (lid == be16_to_cpu(IB_LID_PERMISSIVE) && - qp->ibqp.qp_type == IB_QPT_GSI)))) { + ((lid == ppd->lid) || + ((lid == be32_to_cpu(OPA_LID_PERMISSIVE)) && + (qp->ibqp.qp_type == IB_QPT_GSI))))) { unsigned long tflags = ps->flags; /* * If DMAs are in progress, we can't generate @@ -341,11 +544,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } qp->s_cur = next_cur; - extra_bytes = -wqe->length & 3; - nwords = (wqe->length + extra_bytes) >> 2; - - /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ - qp->s_hdrwords = 7; ps->s_txreq->s_cur_size = wqe->length; ps->s_txreq->ss = &qp->s_sge; qp->s_srate = rdma_ah_get_static_rate(ah_attr); @@ -356,78 +554,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_sge.num_sge = wqe->wr.num_sge; qp->s_sge.total_len = wqe->length; - if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { - /* Header size in 32-bit words. */ - qp->s_hdrwords += - hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.ibh.u.l.grh, - rdma_ah_read_grh(ah_attr), - qp->s_hdrwords, nwords); - lrh0 = HFI1_LRH_GRH; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; - /* - * Don't worry about sending to locally attached multicast - * QPs. It is unspecified by the spec. what happens. - */ - } else { - /* Header size in 32-bit words. */ - lrh0 = HFI1_LRH_BTH; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - } - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { - qp->s_hdrwords++; - ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; - bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; - } else { - bth0 = IB_OPCODE_UD_SEND_ONLY << 24; - } - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)]; - lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; - if (qp->ibqp.qp_type == IB_QPT_SMI) { - lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ - priv->s_sc = 0xf; - } else { - lrh0 |= (sc5 & 0xf) << 12; - priv->s_sc = sc5; - } + /* Make the appropriate header */ + hfi1_make_ud_req_tbl[priv->hdr_type](qp, ps, qp->s_wqe); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); ps->s_txreq->sde = priv->s_sde; priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); ps->s_txreq->psc = priv->s_sendcontext; - ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.ibh.lrh[1] = - cpu_to_be16(rdma_ah_get_dlid(ah_attr)); - ps->s_txreq->phdr.hdr.ibh.lrh[2] = - cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - if (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) { - ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; - } else { - lid = ppd->lid; - if (lid) { - lid |= rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1); - ps->s_txreq->phdr.hdr.ibh.lrh[3] = cpu_to_be16(lid); - } else { - ps->s_txreq->phdr.hdr.ibh.lrh[3] = IB_LID_PERMISSIVE; - } - } - if (wqe->wr.send_flags & IB_SEND_SOLICITED) - bth0 |= IB_BTH_SOLICITED; - bth0 |= extra_bytes << 20; - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) - bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); - else - bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); - /* - * Qkeys with the high order bit set mean use the - * qkey from the QP context instead of the WR (see 10.2.5). - */ - ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ? - qp->qkey : wqe->ud_wr.remote_qkey); - ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); /* disarm any ahg */ priv->s_ahg->ahgcount = 0; priv->s_ahg->ahgidx = 0; @@ -497,6 +629,64 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey) return -1; } +void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, + u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, + u8 sc5, const struct ib_grh *old_grh) +{ + u64 pbc, pbc_flags = 0; + u32 bth0, plen, vl, hwords = 7; + u16 len; + u8 l4; + struct hfi1_16b_header hdr; + struct ib_other_headers *ohdr; + struct pio_buf *pbuf; + struct send_context *ctxt = qp_to_send_context(qp, sc5); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 nwords; + + /* Populate length */ + nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; + if (old_grh) { + struct ib_grh *grh = &hdr.u.l.grh; + + grh->version_tclass_flow = old_grh->version_tclass_flow; + grh->paylen = cpu_to_be16((hwords - 4 + nwords) << 2); + grh->hop_limit = 0xff; + grh->sgid = old_grh->dgid; + grh->dgid = old_grh->sgid; + ohdr = &hdr.u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + hwords += sizeof(struct ib_grh) / sizeof(u32); + } else { + ohdr = &hdr.u.oth; + l4 = OPA_16B_L4_IB_LOCAL; + } + + /* BIT 16 to 19 is TVER. Bit 20 to 22 is pad cnt */ + bth0 = (IB_OPCODE_CNP << 24) | (1 << 16) | + (hfi1_get_16b_padding(hwords << 2, 0) << 20); + ohdr->bth[0] = cpu_to_be32(bth0); + + ohdr->bth[1] = cpu_to_be32(remote_qpn); + ohdr->bth[2] = 0; /* PSN 0 */ + + /* Convert dwords to flits */ + len = (hwords + nwords) >> 1; + hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + + plen = 2 /* PBC */ + hwords + nwords; + pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + vl = sc_to_vlt(ppd->dd, sc5); + pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + if (ctxt) { + pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); + if (pbuf) + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, + &hdr, hwords); + } +} + void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh) @@ -535,11 +725,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hdr.lrh[0] = cpu_to_be16(lrh0); - hdr.lrh[1] = cpu_to_be16(dlid); - hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(slid); - + hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); @@ -672,18 +858,33 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); + u8 sc5 = packet->sc; u8 sl_from_sc; - u8 extra_bytes = packet->pad; u8 opcode = packet->opcode; u8 sl = packet->sl; u32 dlid = packet->dlid; u32 slid = packet->slid; + u8 extra_bytes; + bool dlid_is_permissive; + bool slid_is_permissive; + extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); qkey = ib_get_qkey(ohdr); src_qp = ib_get_sqpn(ohdr); - pkey = ib_bth_get_pkey(ohdr); - extra_bytes += (SIZE_OF_CRC << 2); + + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + u32 permissive_lid = + opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); + + pkey = hfi1_16B_get_pkey(packet->hdr); + dlid_is_permissive = (dlid == permissive_lid); + slid_is_permissive = (slid == permissive_lid); + } else { + hdr = packet->hdr; + pkey = ib_bth_get_pkey(ohdr); + dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); + slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); + } sl_from_sc = ibp->sc_to_sl[sc5]; process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); @@ -701,8 +902,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). */ if (qp->ibqp.qp_num) { - if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE)) + if (unlikely(dlid_is_permissive || slid_is_permissive)) goto drop; if (qp->ibqp.qp_num > 1) { if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) { @@ -740,8 +940,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (tlen > 2048) goto drop; - if ((hdr->lrh[1] == IB_LID_PERMISSIVE || - hdr->lrh[3] == IB_LID_PERMISSIVE) && + if ((dlid_is_permissive || slid_is_permissive) && smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) goto drop; @@ -794,7 +993,18 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) goto drop; } if (packet->grh) { - hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh, + hfi1_copy_sge(&qp->r_sge, packet->grh, + sizeof(struct ib_grh), true, false); + wc.wc_flags |= IB_WC_GRH; + } else if (packet->etype == RHF_RCV_TYPE_BYPASS) { + struct ib_grh grh; + /* + * Assuming we only created 16B on the send side + * if we want to use large LIDs, since GRH was stripped + * out when creating 16B, add back the GRH here. + */ + hfi1_make_ext_grh(packet, &grh, slid, dlid); + hfi1_copy_sge(&qp->r_sge, &grh, sizeof(struct ib_grh), true, false); wc.wc_flags |= IB_WC_GRH; } else { @@ -827,14 +1037,15 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else { wc.pkey_index = 0; } - + if (slid_is_permissive) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); wc.slid = slid; wc.sl = sl_from_sc; /* * Save the LMC lower bits if the destination LID is a unicast LID. */ - wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 : + wc.dlid_path_bits = hfi1_check_mcast(dlid) ? 0 : dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d3dd0c01b8f6..4928ee4f92c1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -259,7 +259,7 @@ static inline int hfi1_send_ok(struct rvt_qp *qp) * This must be called with s_lock held. */ void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl, - u32 qp1, u32 qp2, u16 lid1, u16 lid2); + u32 qp1, u32 qp2, u32 lid1, u32 lid2); void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num); void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index 9ae126fb8648..e6e90f18e6d5 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -54,6 +54,7 @@ #define OPA_MAKE_ID(x) (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x))) #define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \ ? 0 : x) +#define OPA_GID_INDEX 0x1 /** * 0xF8 - 4 bits of multicast range and 1 bit for collective range * Example: For 24 bit LID space, -- cgit v1.2.3 From 863cf89d472fe7a61305b06de84b9ed2dea02611 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:29 -0700 Subject: IB/hfi1: Add 16B trace support Add trace support to 16B bypass packets during send and receive. Sample input header trace: -0 [000] d.h. 271742.509477: input_ibhdr: [0000:05:00.0] (16B) len:24 sc:0 dlid:0xf0000b slid:0x10002 age:0 becn:0 fecn:0 l4:10 rc:0 sc:0 pkey:0x8001 entropy:0x0000 op:0x65,UD_SEND_ONLY_WITH_IMMEDIATE se:0 m:1 pad:3 tver:0 qpn:0xffffff a:0 psn:0x00000001 hlen:248 deth qkey 0x01234567 sqpn 0x000004 Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 25 +++ drivers/infiniband/hw/hfi1/trace.c | 153 ++++++++++++- drivers/infiniband/hw/hfi1/trace_ibhdrs.h | 351 ++++++++++++++++++++---------- 3 files changed, 406 insertions(+), 123 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7e21192da8e1..b07f42cfa5bf 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -381,6 +381,11 @@ struct hfi1_packet { #define OPA_16B_PKEY_SHIFT 16 #define OPA_16B_LEN_MASK 0x7FF00000ull #define OPA_16B_LEN_SHIFT 20 +#define OPA_16B_RC_MASK 0xE000000ull +#define OPA_16B_RC_SHIFT 25 +#define OPA_16B_AGE_MASK 0xFF0000ull +#define OPA_16B_AGE_SHIFT 16 +#define OPA_16B_ENTROPY_MASK 0xFFFFull /* * OPA 16B L2/L4 Encodings @@ -434,6 +439,26 @@ static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr) return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT); } +static inline u8 hfi1_16B_get_rc(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[1] & OPA_16B_RC_MASK) >> OPA_16B_RC_SHIFT); +} + +static inline u8 hfi1_16B_get_age(struct hfi1_16b_header *hdr) +{ + return (u8)((hdr->lrh[3] & OPA_16B_AGE_MASK) >> OPA_16B_AGE_SHIFT); +} + +static inline u16 hfi1_16B_get_len(struct hfi1_16b_header *hdr) +{ + return (u16)((hdr->lrh[0] & OPA_16B_LEN_MASK) >> OPA_16B_LEN_SHIFT); +} + +static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr) +{ + return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK); +} + /* * BTH */ diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index b80b74d0c252..9938bb983ce6 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,7 +47,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) +static u8 __get_ib_hdr_len(struct ib_header *hdr) { struct ib_other_headers *ohdr; u8 opcode; @@ -61,9 +61,60 @@ u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr) 0 : hdr_len_by_opcode[opcode] - (12 + 8); } +static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr) +{ + struct ib_other_headers *ohdr; + u8 opcode; + + if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + opcode = ib_bth_get_opcode(ohdr); + return hdr_len_by_opcode[opcode] == 0 ? + 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8); +} + +u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet) +{ + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return __get_ib_hdr_len(packet->hdr); + else + return __get_16b_hdr_len(packet->hdr); +} + +u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opa_hdr) +{ + if (!opa_hdr->hdr_type) + return __get_ib_hdr_len(&opa_hdr->ibh); + else + return __get_16b_hdr_len(&opa_hdr->opah); +} + const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) { - return "IB"; + if (packet->etype != RHF_RCV_TYPE_BYPASS) + return "IB"; + + switch (hfi1_16B_get_l2(packet->hdr)) { + case 0: + return "0"; + case 1: + return "1"; + case 2: + return "16B"; + case 3: + return "9B"; + } + return ""; +} + +const char *hfi1_trace_get_packet_type_str(u8 l4) +{ + if (l4) + return "16B"; + else + return "9B"; } #define IMM_PRN "imm:%d" @@ -89,10 +140,10 @@ static const char *parse_syndrome(u8 syndrome) return ""; } -void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, - u8 *se, u8 *pad, u8 *opcode, u8 *tver, - u16 *pkey, u32 *psn, u32 *qpn) +void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn) { *ack = ib_bth_get_ackreq(ohdr); *becn = ib_bth_get_becn(ohdr); @@ -107,8 +158,22 @@ void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, *qpn = ib_bth_get_qpn(ohdr); } +void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *mig, u8 *opcode, + u8 *pad, u8 *se, u8 *tver, + u32 *psn, u32 *qpn) +{ + *ack = ib_bth_get_ackreq(ohdr); + *mig = ib_bth_get_migreq(ohdr); + *opcode = ib_bth_get_opcode(ohdr); + *pad = ib_bth_get_pad(ohdr); + *se = ib_bth_get_se(ohdr); + *tver = ib_bth_get_tver(ohdr); + *psn = ib_bth_get_psn(ohdr); + *qpn = ib_bth_get_qpn(ohdr); +} + void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, - struct ib_other_headers **ohdr, u8 *lnh, u8 *lver, u8 *sl, u8 *sc, u16 *len, u32 *dlid, u32 *slid) { @@ -119,11 +184,79 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, *len = ib_get_len(hdr); *dlid = ib_get_dlid(hdr); *slid = ib_get_slid(hdr); +} + +void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, + u8 *age, u8 *becn, u8 *fecn, + u8 *l4, u8 *rc, u8 *sc, + u16 *entropy, u16 *len, u16 *pkey, + u32 *dlid, u32 *slid) +{ + *age = hfi1_16B_get_age(hdr); + *becn = hfi1_16B_get_becn(hdr); + *fecn = hfi1_16B_get_fecn(hdr); + *l4 = hfi1_16B_get_l4(hdr); + *rc = hfi1_16B_get_rc(hdr); + *sc = hfi1_16B_get_sc(hdr); + *entropy = hfi1_16B_get_entropy(hdr); + *len = hfi1_16B_get_len(hdr); + *pkey = hfi1_16B_get_pkey(hdr); + *dlid = hfi1_16B_get_dlid(hdr); + *slid = hfi1_16B_get_slid(hdr); +} + +#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x " +#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d" +#define LRH_16B_PRN "age:%d becn:%d fecn:%d l4:%d " \ + "rc:%d sc:%d pkey:0x%.4x entropy:0x%.4x" +const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, + u8 age, u8 becn, u8 fecn, u8 l4, + u8 lnh, const char *lnh_name, u8 lver, + u8 rc, u8 sc, u8 sl, u16 entropy, + u16 len, u16 pkey, u32 dlid, u32 slid) +{ + const char *ret = trace_seq_buffer_ptr(p); + + trace_seq_printf(p, LRH_PRN, len, sc, dlid, slid); + + if (bypass) + trace_seq_printf(p, LRH_16B_PRN, + age, becn, fecn, l4, rc, sc, pkey, entropy); - if (*lnh == HFI1_LRH_BTH) - *ohdr = &hdr->u.oth; else - *ohdr = &hdr->u.l.oth; + trace_seq_printf(p, LRH_9B_PRN, + lnh, lnh_name, lver, sl); + trace_seq_putc(p, 0); + + return ret; +} + +#define BTH_9B_PRN \ + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ + "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" +#define BTH_16B_PRN \ + "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ + "qpn:0x%.6x a:%d psn:0x%.8x" +const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, + u8 ack, u8 becn, u8 fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (bypass) + trace_seq_printf(p, BTH_16B_PRN, + opcode, opname, + se, mig, pad, tver, qpn, ack, psn); + + else + trace_seq_printf(p, BTH_9B_PRN, + opcode, opname, + se, mig, pad, tver, pkey, fecn, becn, + qpn, ack, psn); + trace_seq_putc(p, 0); + + return ret; } const char *parse_everbs_hdrs( diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 73240255ffc5..6721f84dafa5 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -95,17 +95,39 @@ __print_symbolic(opcode, \ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \ ib_opcode_name(CNP)) +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); -u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr); +u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); +u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); +const char *hfi1_trace_get_packet_type_str(u8 l4); const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); -void hfi1_trace_parse_bth(struct ib_other_headers *ohdr, - u8 *ack, u8 *becn, u8 *fecn, u8 *mig, - u8 *se, u8 *pad, u8 *opcode, u8 *tver, - u16 *pkey, u32 *psn, u32 *qpn); +void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *becn, u8 *fecn, u8 *mig, + u8 *se, u8 *pad, u8 *opcode, u8 *tver, + u16 *pkey, u32 *psn, u32 *qpn); void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, - struct ib_other_headers **ohdr, u8 *lnh, u8 *lver, u8 *sl, u8 *sc, u16 *len, u32 *dlid, u32 *slid); +void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, + u8 *ack, u8 *mig, u8 *opcode, + u8 *pad, u8 *se, u8 *tver, + u32 *psn, u32 *qpn); +void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr, + u8 *age, u8 *becn, u8 *fecn, + u8 *l4, u8 *rc, u8 *sc, + u16 *entropy, u16 *len, u16 *pkey, + u32 *dlid, u32 *slid); + +const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, + u8 age, u8 becn, u8 fecn, u8 l4, + u8 lnh, const char *lnh_name, u8 lver, + u8 rc, u8 sc, u8 sl, u16 entropy, + u16 len, u16 pkey, u32 dlid, u32 slid); + +const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, + u8 ack, u8 becn, u8 fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -114,13 +136,8 @@ void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5, __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) - -#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x" -#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d " -#define BTH_PRN \ - "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \ - "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x" -#define EHDR_PRN "hlen:%d %s" +#define PKT_ENTRY(pkt) __string(ptype, hfi1_trace_get_packet_str(packet)) +#define PKT_ASSIGN(pkt) __assign_str(ptype, hfi1_trace_get_packet_str(packet)) DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, @@ -129,75 +146,125 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) + PKT_ENTRY(packet) + __field(bool, bypass) + __field(u8, ack) + __field(u8, age) + __field(u8, becn) + __field(u8, fecn) + __field(u8, l4) __field(u8, lnh) __field(u8, lver) - __field(u8, sl) - __field(u16, len) - __field(u32, dlid) - __field(u8, sc) - __field(u32, slid) - __field(u8, opcode) - __field(u8, se) __field(u8, mig) + __field(u8, opcode) __field(u8, pad) + __field(u8, rc) + __field(u8, sc) + __field(u8, se) + __field(u8, sl) __field(u8, tver) + __field(u16, entropy) + __field(u16, len) __field(u16, pkey) - __field(u8, fecn) - __field(u8, becn) - __field(u32, qpn) - __field(u8, ack) + __field(u32, dlid) __field(u32, psn) + __field(u32, qpn) + __field(u32, slid) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(packet->hdr)) + hfi1_trace_packet_hdr_len(packet)) ), TP_fast_assign( - struct ib_other_headers *ohdr; + DD_DEV_ASSIGN(dd); + PKT_ASSIGN(packet); - DD_DEV_ASSIGN(dd); + if (packet->etype == RHF_RCV_TYPE_BYPASS) { + __entry->bypass = true; + hfi1_trace_parse_16b_hdr(packet->hdr, + &__entry->age, + &__entry->becn, + &__entry->fecn, + &__entry->l4, + &__entry->rc, + &__entry->sc, + &__entry->entropy, + &__entry->len, + &__entry->pkey, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_9b_hdr(packet->hdr, sc5, - &ohdr, - &__entry->lnh, - &__entry->lver, - &__entry->sl, - &__entry->sc, - &__entry->len, - &__entry->dlid, - &__entry->slid); + hfi1_trace_parse_16b_bth(packet->ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } else { + __entry->bypass = false; + hfi1_trace_parse_9b_hdr(packet->hdr, sc5, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_bth(ohdr, &__entry->ack, - &__entry->becn, &__entry->fecn, - &__entry->mig, &__entry->se, - &__entry->pad, &__entry->opcode, - &__entry->tver, &__entry->pkey, - &__entry->psn, &__entry->qpn); - /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), &ohdr->u, - __get_dynamic_array_len(ehdrs)); + hfi1_trace_parse_9b_bth(packet->ohdr, + &__entry->ack, + &__entry->becn, + &__entry->fecn, + &__entry->mig, + &__entry->se, + &__entry->pad, + &__entry->opcode, + &__entry->tver, + &__entry->pkey, + &__entry->psn, + &__entry->qpn); + } + /* extended headers */ + memcpy(__get_dynamic_array(ehdrs), + &packet->ohdr->u, + __get_dynamic_array_len(ehdrs)); ), - TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " - BTH_PRN " " EHDR_PRN, + TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __entry->len, - __entry->sc, - __entry->dlid, - __entry->slid, - __entry->lnh, show_lnh(__entry->lnh), - __entry->lver, - __entry->sl, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->mig, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->fecn, - __entry->becn, - __entry->qpn, - __entry->ack, - __entry->psn, + __get_str(ptype), + hfi1_trace_fmt_lrh(p, + __entry->bypass, + __entry->age, + __entry->becn, + __entry->fecn, + __entry->l4, + __entry->lnh, + show_lnh(__entry->lnh), + __entry->lver, + __entry->rc, + __entry->sc, + __entry->sl, + __entry->entropy, + __entry->len, + __entry->pkey, + __entry->dlid, + __entry->slid), + hfi1_trace_fmt_bth(p, + __entry->bypass, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( @@ -213,78 +280,136 @@ DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr, DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_opa_header *opah, - bool sc5), + struct hfi1_opa_header *opah, bool sc5), TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) + __field(bool, bypass) + __field(u8, ack) + __field(u8, age) + __field(u8, becn) + __field(u8, fecn) + __field(u8, l4) __field(u8, lnh) __field(u8, lver) - __field(u8, sl) - __field(u16, len) - __field(u32, dlid) - __field(u8, sc) - __field(u32, slid) - __field(u8, opcode) - __field(u8, se) __field(u8, mig) + __field(u8, opcode) __field(u8, pad) + __field(u8, rc) + __field(u8, sc) + __field(u8, se) + __field(u8, sl) __field(u8, tver) + __field(u16, entropy) + __field(u16, len) __field(u16, pkey) - __field(u8, fecn) - __field(u8, becn) - __field(u32, qpn) - __field(u8, ack) + __field(u32, dlid) __field(u32, psn) + __field(u32, qpn) + __field(u32, slid) /* extended headers */ __dynamic_array(u8, ehdrs, - hfi1_trace_ib_hdr_len(&opah->ibh)) + hfi1_trace_opa_hdr_len(opah)) ), TP_fast_assign( struct ib_other_headers *ohdr; - struct ib_header *hdr = &opah->ibh; DD_DEV_ASSIGN(dd); - hfi1_trace_parse_9b_hdr(hdr, sc5, - &ohdr, &__entry->lnh, - &__entry->lver, &__entry->sl, - &__entry->sc, &__entry->len, - &__entry->dlid, &__entry->slid); + if (opah->hdr_type) { + __entry->bypass = true; + hfi1_trace_parse_16b_hdr(&opah->opah, + &__entry->age, + &__entry->becn, + &__entry->fecn, + &__entry->l4, + &__entry->rc, + &__entry->sc, + &__entry->entropy, + &__entry->len, + &__entry->pkey, + &__entry->dlid, + &__entry->slid); - hfi1_trace_parse_bth(ohdr, &__entry->ack, - &__entry->becn, &__entry->fecn, - &__entry->mig, &__entry->se, - &__entry->pad, &__entry->opcode, - &__entry->tver, &__entry->pkey, - &__entry->psn, &__entry->qpn); + if (entry->l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &opah->opah.u.oth; + else + ohdr = &opah->opah.u.l.oth; + hfi1_trace_parse_16b_bth(ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } else { + __entry->bypass = false; + hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, + &__entry->lnh, + &__entry->lver, + &__entry->sl, + &__entry->sc, + &__entry->len, + &__entry->dlid, + &__entry->slid); + if (entry->lnh == HFI1_LRH_BTH) + ohdr = &opah->ibh.u.oth; + else + ohdr = &opah->ibh.u.l.oth; + hfi1_trace_parse_9b_bth(ohdr, + &__entry->ack, + &__entry->becn, + &__entry->fecn, + &__entry->mig, + &__entry->se, + &__entry->pad, + &__entry->opcode, + &__entry->tver, + &__entry->pkey, + &__entry->psn, + &__entry->qpn); + } /* extended headers */ memcpy(__get_dynamic_array(ehdrs), &ohdr->u, __get_dynamic_array_len(ehdrs)); ), - TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " " - BTH_PRN " " EHDR_PRN, + TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __entry->len, - __entry->sc, - __entry->dlid, - __entry->slid, - __entry->lnh, show_lnh(__entry->lnh), - __entry->lver, - __entry->sl, - /* BTH */ - __entry->opcode, show_ib_opcode(__entry->opcode), - __entry->se, - __entry->mig, - __entry->pad, - __entry->tver, - __entry->pkey, - __entry->fecn, - __entry->becn, - __entry->qpn, - __entry->ack, - __entry->psn, + hfi1_trace_get_packet_type_str(__entry->l4), + hfi1_trace_fmt_lrh(p, + __entry->bypass, + __entry->age, + __entry->becn, + __entry->fecn, + __entry->l4, + __entry->lnh, + show_lnh(__entry->lnh), + __entry->lver, + __entry->rc, + __entry->sc, + __entry->sl, + __entry->entropy, + __entry->len, + __entry->pkey, + __entry->dlid, + __entry->slid), + hfi1_trace_fmt_bth(p, + __entry->bypass, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( -- cgit v1.2.3 From 51e658f5dd362cc8666f3f5ec1986660e3e51047 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Fri, 4 Aug 2017 13:54:35 -0700 Subject: IB/rdmavt, hfi1, qib: Enhance rdmavt and hfi1 to use 32 bit lids Increase lid used in hfi1 driver to 32 bits. qib continues to use 16 bit lids. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 12 +++-- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/mad.c | 91 ++++++++++++++++++++++++++++++++----- drivers/infiniband/hw/hfi1/verbs.c | 23 +--------- drivers/infiniband/hw/hfi1/verbs.h | 2 - drivers/infiniband/hw/qib/qib_mad.c | 4 +- include/rdma/rdma_vt.h | 2 +- 7 files changed, 93 insertions(+), 43 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ee1324cce25a..cbda37386982 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -10067,10 +10067,16 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) struct hfi1_devdata *dd = ppd->dd; u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); + u32 lid; + /* + * Program 0 in CSR if port lid is extended. This prevents + * 9B packets being sent out for large lids. + */ + lid = (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ? 0 : ppd->lid; c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); - c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) + c1 |= ((lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) | ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK) << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT); @@ -10081,7 +10087,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) */ sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) << SEND_CTXT_CHECK_SLID_MASK_SHIFT) | - (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << + (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << SEND_CTXT_CHECK_SLID_VALUE_SHIFT); for (i = 0; i < dd->chip_send_contexts; i++) { @@ -10091,7 +10097,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) } /* Now we have to do the same thing for the sdma engines */ - sdma_update_lmc(dd, mask, ppd->lid); + sdma_update_lmc(dd, mask, lid); } static const char *state_completed_string(u32 completed) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b07f42cfa5bf..52cae1146b80 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -718,7 +718,7 @@ struct hfi1_pportdata { u32 ibmaxlen; u32 current_egress_rate; /* units [10^6 bits/sec] */ /* LID programmed for this instance */ - u16 lid; + u32 lid; /* list of pkeys programmed; 0 if not set */ u16 pkeys[MAX_PKEY_VALUES]; u16 link_width_supported; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1509bc6b76d8..cdcb4d021480 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -234,6 +234,61 @@ static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp, spin_unlock_irqrestore(&ibp->rvp.lock, flags); } +static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp, + struct rdma_ah_attr *attr, u32 dlid) +{ + rdma_ah_set_dlid(attr, dlid); + rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port); + if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + struct ib_global_route *grh = rdma_ah_retrieve_grh(attr); + + rdma_ah_set_ah_flags(attr, IB_AH_GRH); + grh->sgid_index = 0; + grh->hop_limit = 1; + grh->dgid.global.subnet_prefix = + ibp->rvp.gid_prefix; + grh->dgid.global.interface_id = OPA_MAKE_ID(dlid); + } +} + +static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp, + struct rvt_ah *ah, u32 dlid) +{ + struct rdma_ah_attr attr; + struct rvt_qp *qp0; + int ret = -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.type = ah->ibah.type; + hfi1_update_sm_ah_attr(ibp, &attr, dlid); + rcu_read_lock(); + qp0 = rcu_dereference(ibp->rvp.qp[0]); + if (qp0) + ret = rdma_modify_ah(&ah->ibah, &attr); + rcu_read_unlock(); + return ret; +} + +static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) +{ + struct rdma_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct rvt_qp *qp0; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ppd(ppd); + u8 port_num = ppd->port; + + memset(&attr, 0, sizeof(attr)); + attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); + hfi1_update_sm_ah_attr(ibp, &attr, dlid); + rcu_read_lock(); + qp0 = rcu_dereference(ibp->rvp.qp[0]); + if (qp0) + ah = rdma_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) { struct ib_mad_send_buf *send_buf; @@ -1283,8 +1338,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct hfi1_ibport *ibp; u8 clientrereg; unsigned long flags; - u32 smlid, opa_lid; /* tmp vars to hold LID values */ - u16 lid; + u32 smlid; + u32 lid; u8 ls_old, ls_new, ps_new; u8 vls; u8 msl; @@ -1301,22 +1356,20 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } - opa_lid = be32_to_cpu(pi->lid); - if (opa_lid & 0xFFFF0000) { - pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid); + lid = be32_to_cpu(pi->lid); + if (lid & 0xFF000000) { + pr_warn("OPA_PortInfo lid out of range: %X\n", lid); smp->status |= IB_SMP_INVALID_FIELD; goto get_only; } - lid = (u16)(opa_lid & 0x0000FFFF); smlid = be32_to_cpu(pi->sm_lid); - if (smlid & 0xFFFF0000) { + if (smlid & 0xFF000000) { pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid); smp->status |= IB_SMP_INVALID_FIELD; goto get_only; } - smlid &= 0x0000FFFF; clientrereg = (pi->clientrereg_subnettimeout & OPA_PI_MASK_CLIENT_REREGISTER); @@ -1331,12 +1384,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, ls_old = driver_lstate(ppd); ibp->rvp.mkey = pi->mkey; - ibp->rvp.gid_prefix = pi->subnet_prefix; + if (ibp->rvp.gid_prefix != pi->subnet_prefix) { + ibp->rvp.gid_prefix = pi->subnet_prefix; + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); + } ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period); /* Must be a valid unicast LID address. */ if ((lid == 0 && ls_old > IB_PORT_INIT) || - lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + (hfi1_is_16B_mcast(lid))) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n", lid); @@ -1349,6 +1406,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC); event.event = IB_EVENT_LID_CHANGE; ib_dispatch_event(&event); + + if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) { + /* Manufacture GID from LID to support extended + * addresses + */ + ppd->guids[HFI1_PORT_GUID_INDEX + 1] = + be64_to_cpu(OPA_MAKE_ID(lid)); + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); + } } msl = pi->smsl & OPA_PI_MASK_SMSL; @@ -1359,7 +1426,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, /* Must be a valid unicast LID address. */ if ((smlid == 0 && ls_old > IB_PORT_INIT) || - smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) { + (hfi1_is_16B_mcast(smlid))) { smp->status |= IB_SMP_INVALID_FIELD; pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid); } else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) { @@ -1367,7 +1434,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, spin_lock_irqsave(&ibp->rvp.lock, flags); if (ibp->rvp.sm_ah) { if (smlid != ibp->rvp.sm_lid) - rdma_ah_set_dlid(&ibp->rvp.sm_ah->attr, smlid); + hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid); if (msl != ibp->rvp.sm_sl) rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl); } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 18b27276f202..83565e5f46d0 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1394,7 +1394,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; - u16 lid = ppd->lid; + u32 lid = ppd->lid; /* props being zeroed by the caller, avoid zeroing it here */ props->lid = lid ? lid : 0; @@ -1555,27 +1555,6 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev, ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); } -struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) -{ - struct rdma_ah_attr attr; - struct ib_ah *ah = ERR_PTR(-EINVAL); - struct rvt_qp *qp0; - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ppd(ppd); - u8 port_num = ppd->port; - - memset(&attr, 0, sizeof(attr)); - attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num); - rdma_ah_set_dlid(&attr, dlid); - rdma_ah_set_port_num(&attr, ppd_from_ibp(ibp)->port); - rcu_read_lock(); - qp0 = rcu_dereference(ibp->rvp.qp[0]); - if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); - rcu_read_unlock(); - return ah; -} - /** * hfi1_get_npkeys - return the size of the PKEY table for context 0 * @dd: the hfi1_ib device diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 4928ee4f92c1..ab1618e32d9c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -334,8 +334,6 @@ void hfi1_rc_hdrerr( u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); -struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); - void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah); void hfi1_ud_rcv(struct hfi1_packet *packet); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 6b9b43b944e3..549c71971a1f 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -105,7 +105,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { struct ib_ah *ah; - ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid); + ah = qib_create_qp0_ah(ibp, (u16)ibp->rvp.sm_lid); if (IS_ERR(ah)) ret = PTR_ERR(ah); else { @@ -496,7 +496,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, pip->mkey = ibp->rvp.mkey; pip->gid_prefix = ibp->rvp.gid_prefix; pip->lid = cpu_to_be16(ppd->lid); - pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid); + pip->sm_lid = cpu_to_be16((u16)ibp->rvp.sm_lid); pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags); /* pip->diag_code; */ pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index fdfac0fd2f82..1d94f3c264ba 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -91,7 +91,7 @@ struct rvt_ibport { __be16 pma_counter_select[5]; u16 pma_tag; u16 mkey_lease_period; - u16 sm_lid; + u32 sm_lid; u8 sm_sl; u8 mkeyprot; u8 subnet_timeout; -- cgit v1.2.3 From 5b6cabb0db772042906cdc0fc235fe2a4f5a6000 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:41 -0700 Subject: IB/hfi1: Add 16B RC/UC support Add 16B bypass packet support for RC/UC traffic types. Reviewed-by: Dennis Dalessandro Signed-off-by: Don Hiatt Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 4 +- drivers/infiniband/hw/hfi1/rc.c | 390 ++++++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/ruc.c | 201 ++++++++++++++----- drivers/infiniband/hw/hfi1/uc.c | 40 ++-- drivers/infiniband/hw/hfi1/verbs.h | 3 +- 5 files changed, 445 insertions(+), 193 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 52cae1146b80..1dfbf16c2ca9 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -459,6 +459,8 @@ static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr) return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK); } +#define OPA_16B_MAKE_QW(low_dw, high_dw) (((u64)(high_dw) << 32) | (low_dw)) + /* * BTH */ @@ -1538,7 +1540,7 @@ static inline u32 egress_cycles(u32 len, u32 rate) } void set_link_ipg(struct hfi1_pportdata *ppd); -void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, +void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type); void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e3dbf6d45afe..99defcc0ce45 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -100,8 +100,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - hwords = 5; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + else + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; switch (qp->s_ack_state) { case OP(RDMA_READ_RESPONSE_LAST): @@ -258,8 +262,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; - /* header size in 32-bit words LRH+BTH = (8+12)/4. */ - u32 hwords = 5; + u32 hwords; u32 len; u32 bth0 = 0; u32 bth2; @@ -273,9 +276,23 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } else { + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + } /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && @@ -703,6 +720,154 @@ bail_no_tx: return 0; } +static inline void hfi1_make_bth_aeth(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth1) +{ + if (qp->r_nak_state) + ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | + (qp->r_nak_state << + IB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = rvt_compute_aeth(qp); + + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); +} + +static inline void hfi1_queue_rc_ack(struct rvt_qp *qp, bool is_fecn) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); + qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + if (is_fecn) + qp->s_flags |= RVT_S_ECN; + + /* Schedule the send tasklet. */ + hfi1_schedule_send(qp); +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +} + +static inline void hfi1_make_rc_ack_9B(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_header *hdr = &opa_hdr->ibh; + struct ib_other_headers *ohdr; + u16 lrh0 = HFI1_LRH_BTH; + u16 pkey; + u32 bth0, bth1; + + opa_hdr->hdr_type = HFI1_PKT_TYPE_9B; + ohdr = &hdr->u.oth; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ + *hwords = 6; + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { + *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + *hwords - 2, SIZE_OF_CRC); + ohdr = &hdr->u.l.oth; + lrh0 = HFI1_LRH_GRH; + } + /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ + *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); + + /* read pkey_index w/o lock (its atomic) */ + pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + + lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT | + (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) << + IB_SL_SHIFT; + + hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), + ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr)); + + bth0 = pkey | (OP(ACKNOWLEDGE) << 24); + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + bth1 = (!!is_fecn) << IB_BECN_SHIFT; + hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); +} + +static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords) +{ + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_16b_header *hdr = &opa_hdr->opah; + struct ib_other_headers *ohdr; + u32 bth0, bth1; + u16 len, pkey; + u8 becn = !!is_fecn; + u8 l4 = OPA_16B_L4_IB_LOCAL; + u8 extra_bytes; + + opa_hdr->hdr_type = HFI1_PKT_TYPE_16B; + ohdr = &hdr->u.oth; + /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */ + *hwords = 8; + extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0); + *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2); + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { + *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + *hwords - 4, *nwords); + ohdr = &hdr->u.l.oth; + l4 = OPA_16B_L4_IB_GLOBAL; + } + *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + + /* read pkey_index w/o lock (its atomic) */ + pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + + /* Convert dwords to flits */ + len = (*hwords + *nwords) >> 1; + + hfi1_make_16b_hdr(hdr, + ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr), + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), + 16B), + len, pkey, becn, 0, l4, sc5); + + bth0 = pkey | (OP(ACKNOWLEDGE) << 24); + bth0 |= extra_bytes << 20; + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth1 = OPA_BTH_MIG_REQ; + hfi1_make_bth_aeth(qp, ohdr, bth0, bth1); +} + +typedef void (*hfi1_make_rc_ack)(struct rvt_qp *qp, + struct hfi1_opa_header *opa_hdr, + u8 sc5, bool is_fecn, + u64 *pbc_flags, u32 *hwords, + u32 *nwords); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B +}; + /** * hfi1_send_rc_ack - Construct an ACK packet and send it * @qp: a pointer to the QP @@ -711,87 +876,48 @@ bail_no_tx: * Note that RDMA reads and atomics are handled in the * send side QP state and send engine. */ -void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, - int is_fecn) +void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, + struct rvt_qp *qp, bool is_fecn) { struct hfi1_ibport *ibp = rcd_to_iport(rcd); + struct hfi1_qp_priv *priv = qp->priv; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; u64 pbc, pbc_flags = 0; - u16 lrh0; - u16 sc5; - u32 bth0; - u32 hwords; - u32 vl, plen; - struct send_context *sc; + u32 hwords = 0; + u32 nwords = 0; + u32 plen; struct pio_buf *pbuf; - struct hfi1_opa_header opah; - struct ib_header *hdr; - struct ib_other_headers *ohdr; - unsigned long flags; + struct hfi1_opa_header opa_hdr; /* clear the defer count */ qp->r_adefered = 0; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ - if (qp->s_flags & RVT_S_RESP_PENDING) - goto queue_ack; + if (qp->s_flags & RVT_S_RESP_PENDING) { + hfi1_queue_rc_ack(qp, is_fecn); + return; + } /* Ensure s_rdma_ack_cnt changes are committed */ smp_read_barrier_depends(); - if (qp->s_rdma_ack_cnt) - goto queue_ack; - - /* Construct the header */ - opah.hdr_type = 0; - hdr = &opah.ibh; - - /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ - hwords = 6; - if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - hwords += hfi1_make_grh(ibp, &hdr->u.l.grh, - rdma_ah_read_grh(&qp->remote_ah_attr), - hwords, 0); - ohdr = &hdr->u.l.oth; - lrh0 = HFI1_LRH_GRH; - } else { - ohdr = &hdr->u.oth; - lrh0 = HFI1_LRH_BTH; + if (qp->s_rdma_ack_cnt) { + hfi1_queue_rc_ack(qp, is_fecn); + return; } - /* read pkey_index w/o lock (its atomic) */ - bth0 = hfi1_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24); - if (qp->s_mig_state == IB_MIG_MIGRATED) - bth0 |= IB_BTH_MIG_REQ; - if (qp->r_nak_state) - ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | - (qp->r_nak_state << - IB_AETH_CREDIT_SHIFT)); - else - ohdr->u.aeth = rvt_compute_aeth(qp); - sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) - & 0xf) << 4; - hdr->lrh[0] = cpu_to_be16(lrh0); - hdr->lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - hdr->lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr->lrh[3] = cpu_to_be16(ppd->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); - ohdr->bth[0] = cpu_to_be32(bth0); - ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); - ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); - ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); /* Don't try to send ACKs if the link isn't ACTIVE */ if (driver_lstate(ppd) != IB_PORT_ACTIVE) return; - sc = rcd->sc; - plen = 2 /* PBC */ + hwords; - vl = sc_to_vlt(ppd->dd, sc5); - pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); + /* Make the appropriate header */ + hfi1_make_rc_ack_tbl[priv->hdr_type](qp, &opa_hdr, sc5, is_fecn, + &pbc_flags, &hwords, &nwords); - pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); + plen = 2 /* PBC */ + hwords + nwords; + pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, + sc_to_vlt(ppd->dd, sc5), plen); + pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL); if (!pbuf) { /* * We have no room to send at the moment. Pass @@ -799,32 +925,18 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ - goto queue_ack; + hfi1_queue_rc_ack(qp, is_fecn); + return; } - trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), - &opah, ib_is_sc5(sc5)); + &opa_hdr, ib_is_sc5(sc5)); /* write the pbc and data */ - ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, hdr, hwords); - + ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, + (priv->hdr_type == HFI1_PKT_TYPE_9B ? + (void *)&opa_hdr.ibh : + (void *)&opa_hdr.opah), hwords); return; - -queue_ack: - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) - goto unlock; - this_cpu_inc(*ibp->rvp.rc_qacks); - qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; - qp->s_nak_state = qp->r_nak_state; - qp->s_ack_psn = qp->r_ack_psn; - if (is_fecn) - qp->s_flags |= RVT_S_ECN; - - /* Schedule the send engine. */ - hfi1_schedule_send(qp); -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); } /** @@ -992,8 +1104,10 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) { struct ib_other_headers *ohdr; - struct ib_header *hdr = &opah->ibh; + struct hfi1_qp_priv *priv = qp->priv; struct rvt_swqe *wqe; + struct ib_header *hdr = NULL; + struct hfi1_16b_header *hdr_16b = NULL; u32 opcode; u32 psn; @@ -1002,10 +1116,22 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) return; /* Find out where the BTH is */ - if (ib_get_lnh(hdr) == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + hdr = &opah->ibh; + if (ib_get_lnh(hdr) == HFI1_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + } else { + u8 l4; + + hdr_16b = &opah->opah; + l4 = hfi1_16B_get_l4(hdr_16b); + if (l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &hdr_16b->u.oth; + else + ohdr = &hdr_16b->u.l.oth; + } opcode = ib_bth_get_opcode(ohdr); if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && @@ -1405,36 +1531,34 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, /** * rc_rcv_resp - process an incoming RC response packet - * @ibp: the port this packet came in on - * @ohdr: the other headers for this packet - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet - * @opcode: the opcode for this packet - * @psn: the packet sequence number for this packet - * @hdrsize: the header length - * @pmtu: the path MTU + * @packet: data packet information * * This is called from hfi1_rc_rcv() to process an incoming RC response * packet for the given QP. * Called at interrupt level. */ -static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct ib_other_headers *ohdr, - void *data, u32 tlen, struct rvt_qp *qp, - u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, - struct hfi1_ctxtdata *rcd) +static void rc_rcv_resp(struct hfi1_packet *packet) { + struct hfi1_ctxtdata *rcd = packet->rcd; + void *data = packet->payload; + u32 tlen = packet->tlen; + struct rvt_qp *qp = packet->qp; + struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct ib_other_headers *ohdr = packet->ohdr; struct rvt_swqe *wqe; enum ib_wc_status status; unsigned long flags; int diff; - u32 pad; - u32 aeth; u64 val; + u32 aeth; + u32 psn = ib_bth_get_psn(packet->ohdr); + u32 pmtu = qp->pmtu; + u16 hdrsize = packet->hlen; + u8 opcode = packet->opcode; + u8 pad = packet->pad; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); spin_lock_irqsave(&qp->s_lock, flags); - trace_hfi1_ack(qp, psn); /* Ignore invalid responses. */ @@ -1500,7 +1624,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; read_middle: - if (unlikely(tlen != (hdrsize + pmtu + 4))) + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto ack_len_err; if (unlikely(pmtu >= qp->s_rdma_read_len)) goto ack_len_err; @@ -1532,13 +1656,11 @@ read_middle: aeth = be32_to_cpu(ohdr->u.aeth); if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) goto ack_done; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 0 && <= pmtu. * Remember to account for ICRC (4). */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto ack_len_err; /* * If this is a response to a resent RDMA read, we @@ -1556,16 +1678,14 @@ read_middle: goto ack_seq_err; if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) goto ack_op_err; - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* * Check that the data size is >= 1 && <= pmtu. * Remember to account for ICRC (4). */ - if (unlikely(tlen <= (hdrsize + pad + 4))) + if (unlikely(tlen <= (hdrsize + extra_bytes))) goto ack_len_err; read_last: - tlen -= hdrsize + pad + 4; + tlen -= hdrsize + extra_bytes; if (unlikely(tlen != qp->s_rdma_read_len)) goto ack_len_err; aeth = be32_to_cpu(ohdr->u.aeth); @@ -1850,7 +1970,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, spin_unlock_irqrestore(&ppd->cc_log_lock, flags); } -void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, +void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, u32 rqpn, u8 svc_type) { struct cca_timer *cca_timer; @@ -1907,12 +2027,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, /** * hfi1_rc_rcv - process an incoming RC packet - * @rcd: the context pointer - * @hdr: the header of this packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP for this packet + * @packet: data packet information * * This is called from qp_rcv() to process an incoming RC packet * for the given QP. @@ -1926,10 +2041,10 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = rcd_to_iport(rcd); struct ib_other_headers *ohdr = packet->ohdr; - u32 bth0; + u32 bth0 = be32_to_cpu(ohdr->bth[0]); u32 opcode = packet->opcode; u32 hdrsize = packet->hlen; - u32 psn; + u32 psn = ib_bth_get_psn(packet->ohdr); u32 pad = packet->pad; struct ib_wc wc; u32 pmtu = qp->pmtu; @@ -1940,15 +2055,14 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) bool is_fecn = false; bool copy_last = false; u32 rkey; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); lockdep_assert_held(&qp->r_lock); - bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, packet)) return; is_fecn = process_ecn(qp, packet, false); - psn = ib_bth_get_psn(ohdr); /* * Process responses (ACKs) before anything else. Note that the @@ -1958,8 +2072,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) */ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { - rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, - hdrsize, pmtu, rcd); + rc_rcv_resp(packet); if (is_fecn) goto send_ack; return; @@ -2026,7 +2139,12 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) case OP(RDMA_WRITE_MIDDLE): send_middle: /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) + /* + * There will be no padding for 9B packet but 16B packets + * will come in with some padding since we always add + * CRC and LT bytes which will need to be flit aligned + */ + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto nack_inv; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) @@ -2080,10 +2198,10 @@ no_immediate_data: send_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto nack_inv; - /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + /* Don't count the CRC(and padding and LT byte for 16B). */ + tlen -= (hdrsize + extra_bytes); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 6839bfae933c..b3291f0fde9a 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -735,73 +735,186 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, - u32 bth0, u32 bth2, int middle, - struct hfi1_pkt_state *ps) +static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth1, u32 bth2) +{ + bth1 |= qp->remote_qpn; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(bth1); + ohdr->bth[2] = cpu_to_be32(bth2); +} + +static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = ps->ibp; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 bth1 = 0; + u32 slid; + u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + u8 l4 = OPA_16B_L4_IB_LOCAL; + u8 extra_bytes = hfi1_get_16b_padding((qp->s_hdrwords << 2), + ps->s_txreq->s_cur_size); + u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + + extra_bytes + SIZE_OF_LT) >> 2); + u8 becn = 0; + + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { + struct ib_grh *grh; + struct ib_global_route *grd = + rdma_ah_retrieve_grh(&qp->remote_ah_attr); + int hdrwords; + + /* + * Ensure OPA GIDs are transformed to IB gids + * before creating the GRH. + */ + if (grd->sgid_index == OPA_GID_INDEX) + grd->sgid_index = 0; + grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; + l4 = OPA_16B_L4_IB_GLOBAL; + hdrwords = qp->s_hdrwords - 4; + qp->s_hdrwords += hfi1_make_grh(ibp, grh, grd, + hdrwords, nwords); + middle = 0; + } + + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth1 |= OPA_BTH_MIG_REQ; + else + middle = 0; + + if (middle) + build_ahg(qp, bth2); + else + qp->s_flags &= ~RVT_S_AHG_VALID; + + bth0 |= pkey; + bth0 |= extra_bytes << 20; + if (qp->s_flags & RVT_S_ECN) { + qp->s_flags &= ~RVT_S_ECN; + /* we recently received a FECN, so return a BECN */ + becn = 1; + } + hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); + + if (!ppd->lid) + slid = be32_to_cpu(OPA_LID_PERMISSIVE); + else + slid = ppd->lid | + (rdma_ah_get_path_bits(&qp->remote_ah_attr) & + ((1 << ppd->lmc) - 1)); + + hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, + slid, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), + 16B), + (qp->s_hdrwords + nwords) >> 1, + pkey, becn, 0, l4, priv->s_sc); +} + +static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; - u16 lrh0; - u32 nwords; - u32 extra_bytes; - u32 bth1; - - /* Construct the header. */ - extra_bytes = -ps->s_txreq->s_cur_size & 3; - nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; - lrh0 = HFI1_LRH_BTH; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u32 bth1 = 0; + u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); + u16 lrh0 = HFI1_LRH_BTH; + u16 slid; + u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; + u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + + extra_bytes) >> 2); + if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { - qp->s_hdrwords += - hfi1_make_grh(ibp, - &ps->s_txreq->phdr.hdr.ibh.u.l.grh, - &qp->remote_ah_attr.grh, - qp->s_hdrwords, nwords); + struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; + int hdrwords = qp->s_hdrwords - 2; + lrh0 = HFI1_LRH_GRH; + qp->s_hdrwords += + hfi1_make_grh(ibp, grh, + rdma_ah_read_grh(&qp->remote_ah_attr), + hdrwords, nwords); middle = 0; } lrh0 |= (priv->s_sc & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; - /* - * reset s_ahg/AHG fields - * - * This insures that the ahgentry/ahgcount - * are at a non-AHG default to protect - * build_verbs_tx_desc() from using - * an include ahgidx. - * - * build_ahg() will modify as appropriate - * to use the AHG feature. - */ - priv->s_ahg->tx_flags = 0; - priv->s_ahg->ahgcount = 0; - priv->s_ahg->ahgidx = 0; + if (qp->s_mig_state == IB_MIG_MIGRATED) bth0 |= IB_BTH_MIG_REQ; else middle = 0; + if (middle) build_ahg(qp, bth2); else qp->s_flags &= ~RVT_S_AHG_VALID; - ps->s_txreq->phdr.hdr.ibh.lrh[0] = cpu_to_be16(lrh0); - ps->s_txreq->phdr.hdr.ibh.lrh[1] = - cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); - ps->s_txreq->phdr.hdr.ibh.lrh[2] = - cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - ps->s_txreq->phdr.hdr.ibh.lrh[3] = - cpu_to_be16(ppd_from_ibp(ibp)->lid | - rdma_ah_get_path_bits(&qp->remote_ah_attr)); - bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); + + bth0 |= pkey; bth0 |= extra_bytes << 20; - ohdr->bth[0] = cpu_to_be32(bth0); - bth1 = qp->remote_qpn; if (qp->s_flags & RVT_S_ECN) { qp->s_flags &= ~RVT_S_ECN; /* we recently received a FECN, so return a BECN */ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } - ohdr->bth[1] = cpu_to_be32(bth1); - ohdr->bth[2] = cpu_to_be32(bth2); + hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); + + if (!ppd->lid) + slid = be16_to_cpu(IB_LID_PERMISSIVE); + else + slid = ppd->lid | + (rdma_ah_get_path_bits(&qp->remote_ah_attr) & + ((1 << ppd->lmc) - 1)); + hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, + lrh0, + qp->s_hdrwords + nwords, + opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), + ppd_from_ibp(ibp)->lid | + rdma_ah_get_path_bits(&qp->remote_ah_attr)); +} + +typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp, + struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps); + +/* We support only two types - 9B and 16B for now */ +static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &hfi1_make_ruc_header_9B, + [HFI1_PKT_TYPE_16B] = &hfi1_make_ruc_header_16B +}; + +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, + u32 bth0, u32 bth2, int middle, + struct hfi1_pkt_state *ps) +{ + struct hfi1_qp_priv *priv = qp->priv; + + /* + * reset s_ahg/AHG fields + * + * This insures that the ahgentry/ahgcount + * are at a non-AHG default to protect + * build_verbs_tx_desc() from using + * an include ahgidx. + * + * build_ahg() will modify as appropriate + * to use the AHG feature. + */ + priv->s_ahg->tx_flags = 0; + priv->s_ahg->ahgcount = 0; + priv->s_ahg->ahgidx = 0; + + /* Make the appropriate header */ + hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps); } /* when sending, force a reschedule every one of these periods */ diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index e0bb766ae36c..0b646173ca22 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -65,7 +65,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; struct rvt_swqe *wqe; - u32 hwords = 5; + u32 hwords; u32 bth0 = 0; u32 len; u32 pmtu = qp->pmtu; @@ -93,9 +93,23 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; - if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) - ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; + if (priv->hdr_type == HFI1_PKT_TYPE_9B) { + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; + } else { + /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ + hwords = 7; + if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && + (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) + ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; + else + ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; + } /* Get the next send request. */ wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -309,6 +323,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) u32 pmtu = qp->pmtu; struct ib_reth *reth; int ret; + u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); if (hfi1_ruc_check_hdr(ibp, packet)) return; @@ -408,7 +423,12 @@ send_first: /* FALLTHROUGH */ case OP(SEND_MIDDLE): /* Check for invalid length PMTU or posted rwqe len. */ - if (unlikely(tlen != (hdrsize + pmtu + 4))) + /* + * There will be no padding for 9B packet but 16B packets + * will come in with some padding since we always add + * CRC and LT bytes which will need to be flit aligned + */ + if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) goto rewind; qp->r_rcv_len += pmtu; if (unlikely(qp->r_rcv_len > qp->r_len)) @@ -428,10 +448,10 @@ no_immediate_data: send_last: /* Check for invalid length. */ /* LAST len should be >= 1 */ - if (unlikely(tlen < (hdrsize + pad + 4))) + if (unlikely(tlen < (hdrsize + extra_bytes))) goto rewind; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); wc.byte_len = tlen + qp->r_rcv_len; if (unlikely(wc.byte_len > qp->r_len)) goto rewind; @@ -524,7 +544,7 @@ rdma_last_imm: if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { @@ -544,14 +564,12 @@ rdma_last_imm: case OP(RDMA_WRITE_LAST): rdma_last: - /* Get the number of bytes the message was padded by. */ - pad = ib_bth_get_pad(ohdr); /* Check for invalid length. */ /* LAST len should be >= 1 */ if (unlikely(tlen < (hdrsize + pad + 4))) goto drop; /* Don't count the CRC. */ - tlen -= (hdrsize + pad + 4); + tlen -= (hdrsize + extra_bytes); if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ab1618e32d9c..b164298a51c6 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -373,7 +373,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status); -void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn); +void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, + bool is_fecn); int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); -- cgit v1.2.3 From 566d53a8264452ee75aa6eb1f2f1970391c1a271 Mon Sep 17 00:00:00 2001 From: Don Hiatt Date: Fri, 4 Aug 2017 13:54:47 -0700 Subject: IB/hfi1: Enhance PIO/SDMA send for 16B PIO/SDMA send logic now uses the hdr_type field to determine the type of packet that has been constructed. Based on the hdr_type, certain things such as PBC flags, padding count and the LT extra trailing bytes are determined. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 +- drivers/infiniband/hw/hfi1/user_sdma.c | 7 +- drivers/infiniband/hw/hfi1/verbs.c | 197 +++++++++++++++++++++------------ drivers/infiniband/hw/hfi1/verbs.h | 1 + 4 files changed, 135 insertions(+), 72 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1dfbf16c2ca9..dff3d3f02b2d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1558,7 +1558,7 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; #define PKEY_CHECK_INVALID -1 -int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, +int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index); #define PACKET_EGRESS_TIMEOUT 350 diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8f02707e9c95..aae1f40016e4 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -502,6 +502,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct sdma_req_info info; struct user_sdma_request *req; u8 opcode, sc, vl; + u16 pkey; + u32 slid; int req_queued = 0; u16 dlid; u32 selector; @@ -635,8 +637,9 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } /* Checking P_KEY for requests from user-space */ - if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc, - PKEY_CHECK_INVALID)) { + pkey = (u16)be32_to_cpu(req->hdr.bth[0]); + slid = be16_to_cpu(req->hdr.lrh[3]); + if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) { ret = -EINVAL; goto free_req; } diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 83565e5f46d0..d279f53fc66e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -506,24 +506,6 @@ again: } } -static u8 get_opcode(struct hfi1_opa_header *hdr) -{ - struct ib_other_headers *ohdr; - - if (hdr->hdr_type) { - if (hfi1_16B_get_l4(&hdr->opah) == OPA_16B_L4_IB_LOCAL) - ohdr = &hdr->opah.u.oth; - else - ohdr = &hdr->opah.u.l.oth; - } else { - if (ib_get_lnh(&hdr->ibh) == HFI1_LRH_BTH) - ohdr = &hdr->ibh.u.oth; - else - ohdr = &hdr->ibh.u.l.oth; - } - return ib_bth_get_opcode(ohdr); -} - /* * Make sure the QP is ready and able to accept the given opcode. */ @@ -830,12 +812,27 @@ static int build_verbs_tx_desc( int ret = 0; struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; + u32 *hdr; + u8 extra_bytes = 0; + static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ + if (tx->phdr.hdr.hdr_type) { + /* + * hdrbytes accounts for PBC. Need to subtract 8 bytes + * before calculating padding. + */ + extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) + + (SIZE_OF_CRC << 2) + SIZE_OF_LT; + hdr = (u32 *)&phdr->hdr.opah; + } else { + hdr = (u32 *)&phdr->hdr.ibh; + } if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( &tx->txreq, ahg_info->tx_flags, - hdrbytes + length, + hdrbytes + length + + extra_bytes, ahg_info->ahgidx, 0, NULL, @@ -865,8 +862,17 @@ static int build_verbs_tx_desc( goto bail_txadd; } /* add the ulp payload - if any. tx->ss can be NULL for acks */ - if (tx->ss) + if (tx->ss) { ret = build_verbs_ulp_payload(sde, length, tx); + if (ret) + goto bail_txadd; + } + + /* add icrc, lt byte, and padding to flit */ + if (extra_bytes != 0) + ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, + trail_buf, extra_bytes); + bail_txadd: return ret; } @@ -878,26 +884,42 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_ahg_info *ahg_info = priv->s_ahg; u32 hdrwords = qp->s_hdrwords; u32 len = ps->s_txreq->s_cur_size; - u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ + u32 plen; struct hfi1_ibdev *dev = ps->dev; struct hfi1_pportdata *ppd = ps->ppd; struct verbs_txreq *tx; u8 sc5 = priv->s_sc; - int ret; + u32 dwords; + bool bypass = false; + + if (ps->s_txreq->phdr.hdr.hdr_type) { + u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len); + + dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + + SIZE_OF_LT) >> 2; + bypass = true; + } else { + dwords = (len + 3) >> 2; + } + plen = hdrwords + dwords + 2; tx = ps->s_txreq; if (!sdma_txreq_built(&tx->txreq)) { if (likely(pbc == 0)) { u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); - u8 opcode = get_opcode(&tx->phdr.hdr); /* No vl15 here */ - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ + if (ps->s_txreq->phdr.hdr.hdr_type) + pbc |= PBC_PACKET_BYPASS | + PBC_INSERT_BYPASS_ICRC; + else + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) - pbc = hfi1_fault_tx(qp, opcode, pbc); + if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, + false))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, @@ -1000,10 +1022,10 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u32 hdrwords = qp->s_hdrwords; struct rvt_sge_state *ss = ps->s_txreq->ss; u32 len = ps->s_txreq->s_cur_size; - u32 dwords = (len + 3) >> 2; - u32 plen = hdrwords + dwords + 2; /* includes pbc */ + u32 dwords; + u32 plen; struct hfi1_pportdata *ppd = ps->ppd; - u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; + u32 *hdr; u8 sc5; unsigned long flags = 0; struct send_context *sc; @@ -1011,6 +1033,23 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int wc_status = IB_WC_SUCCESS; int ret = 0; pio_release_cb cb = NULL; + u32 lrh0_16b; + bool bypass = false; + u8 extra_bytes = 0; + + if (ps->s_txreq->phdr.hdr.hdr_type) { + u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len); + + extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; + dwords = (len + extra_bytes) >> 2; + hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; + lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0]; + bypass = true; + } else { + dwords = (len + 3) >> 2; + hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; + } + plen = hdrwords + dwords + 2; /* only RC/UC use complete */ switch (qp->ibqp.qp_type) { @@ -1028,13 +1067,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (likely(pbc == 0)) { u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); - struct verbs_txreq *tx = ps->s_txreq; - u8 opcode = get_opcode(&tx->phdr.hdr); - /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ - pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false))) - pbc = hfi1_fault_tx(qp, opcode, pbc); + /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ + if (ps->s_txreq->phdr.hdr.hdr_type) + pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; + else + pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } if (cb) @@ -1071,11 +1111,12 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, } } - if (len == 0) { + if (dwords == 0) { pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); } else { + seg_pio_copy_start(pbuf, pbc, + hdr, hdrwords * 4); if (ss) { - seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4); while (len) { void *addr = ss->sge.vaddr; u32 slen = ss->sge.length; @@ -1086,8 +1127,20 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, seg_pio_copy_mid(pbuf, addr, slen); len -= slen; } - seg_pio_copy_end(pbuf); } + /* + * Bypass packet will need to copy additional + * bytes to accommodate for CRC and LT bytes + */ + if (extra_bytes) { + u8 *empty_buf; + + empty_buf = kcalloc(extra_bytes, sizeof(u8), + GFP_KERNEL); + seg_pio_copy_mid(pbuf, empty_buf, extra_bytes); + kfree(empty_buf); + } + seg_pio_copy_end(pbuf); } trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), @@ -1137,10 +1190,10 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) /** * egress_pkey_check - check P_KEY of a packet - * @ppd: Physical IB port data - * @lrh: Local route header - * @bth: Base transport header - * @sc5: SC for packet + * @ppd: Physical IB port data + * @slid: SLID for packet + * @bkey: PKEY for header + * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this * function. @@ -1149,19 +1202,16 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * * Return: 0 on success, otherwise, 1 */ -int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, +int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index) { struct hfi1_devdata *dd; int i; - u16 pkey; int is_user_ctxt_mechanism = (s_pkey_index < 0); if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) return 0; - pkey = (u16)be32_to_cpu(bth[0]); - /* If SC15, pkey[0:14] must be 0x7fff */ if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) goto bad; @@ -1194,8 +1244,6 @@ bad: dd = ppd->dd; if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { - u16 slid = be16_to_cpu(lrh[3]); - dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; dd->err_info_xmit_constraint.slid = slid; @@ -1212,11 +1260,11 @@ bad: * and size */ static inline send_routine get_send_routine(struct rvt_qp *qp, - struct verbs_txreq *tx) + struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_opa_header *h = &tx->phdr.hdr; + struct verbs_txreq *tx = ps->s_txreq; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1228,11 +1276,9 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, break; case IB_QPT_UC: case IB_QPT_RC: { - u8 op = get_opcode(h); - if (piothreshold && tx->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(op & OPMASK) & pio_opmask[op >> 5]) && + (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; @@ -1257,25 +1303,38 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; struct ib_other_headers *ohdr; - struct ib_header *hdr; send_routine sr; int ret; - u8 lnh; + u16 pkey; + u32 slid; - hdr = &ps->s_txreq->phdr.hdr.ibh; /* locate the pkey within the headers */ - lnh = ib_get_lnh(hdr); - if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else - ohdr = &hdr->u.oth; - - sr = get_send_routine(qp, ps->s_txreq); - ret = egress_pkey_check(dd->pport, - hdr->lrh, - ohdr->bth, - priv->s_sc, - qp->s_pkey_index); + if (ps->s_txreq->phdr.hdr.hdr_type) { + struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; + u8 l4 = hfi1_16B_get_l4(hdr); + + if (l4 == OPA_16B_L4_IB_GLOBAL) + ohdr = &hdr->u.l.oth; + else + ohdr = &hdr->u.oth; + slid = hfi1_16B_get_slid(hdr); + pkey = hfi1_16B_get_pkey(hdr); + } else { + struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh; + u8 lnh = ib_get_lnh(hdr); + + if (lnh == HFI1_LRH_GRH) + ohdr = &hdr->u.l.oth; + else + ohdr = &hdr->u.oth; + slid = ib_get_slid(hdr); + pkey = ib_bth_get_pkey(ohdr); + } + + ps->opcode = ib_bth_get_opcode(ohdr); + sr = get_send_routine(qp, ps); + ret = egress_pkey_check(dd->pport, slid, pkey, + priv->s_sc, qp->s_pkey_index); if (unlikely(ret)) { /* * The value we are returning here does not get propagated to diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index b164298a51c6..56ead87a7f65 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -163,6 +163,7 @@ struct hfi1_pkt_state { unsigned long timeout; unsigned long timeout_int; int cpu; + u8 opcode; bool in_thread; bool pkts_sent; }; -- cgit v1.2.3 From 7221403dc958456c77f39b8a7ce6f95d8e65ffa3 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Fri, 4 Aug 2017 13:54:53 -0700 Subject: IB/hfi1: Enable RDMA_CAP_OPA_AH in hfi driver to support extended LIDs Enabling this bit helps core components query for extended address support using the rdma_cap_opa_ah interface. Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Don Hiatt Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index d279f53fc66e..e232f3c608b4 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1911,7 +1911,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; - dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; + dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA | + RDMA_CORE_CAP_OPA_AH; dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; -- cgit v1.2.3 From 028e0a67ea679ff958176eac97d1319a8d685fad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 5 Aug 2017 14:11:50 +0100 Subject: IB/hfi1: fix spelling mistake: "Maximim" -> "Maximum" Trivial fix to spelling mistake in pr_warn_ratelimited warning message Signed-off-by: Colin Ian King Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index cdcb4d021480..37b19bfae02a 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -173,7 +173,7 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, trap_list->list_len++; list_add_tail(&trap->list, &trap_list->list); } else { - pr_warn_ratelimited("hfi1: Maximim trap limit reached for 0x%0x traps\n", + pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n", trap->data.generic_type); kfree(trap); } -- cgit v1.2.3 From b6422bc01259aea7e4e11f1cd052e80acdf1275b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Sun, 13 Aug 2017 08:08:22 -0700 Subject: IB/hfi1: Check xchg returned value for queuing link down entry Check xchg returned value for queuing link down entry to guarantee proper atomic value reads. Fixes: 626c077c025f ("IB/hfi1: Prevent link down request double queuing") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index cbda37386982..bfb50a4e7e5f 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7810,16 +7810,22 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg) if (queue_link_down) { /* * if the link is already going down or disabled, do not - * queue another + * queue another. If there's a link down entry already + * queued, don't queue another one. */ if ((ppd->host_link_state & (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) || - ppd->link_enabled == 0 || ppd->is_link_down_queued) { - dd_dev_info(dd, "%s: not queuing link down\n", - __func__); + ppd->link_enabled == 0) { + dd_dev_info(dd, "%s: not queuing link down. host_link_state %x, link_enabled %x\n", + __func__, ppd->host_link_state, + ppd->link_enabled); } else { - xchg(&ppd->is_link_down_queued, 1); - queue_work(ppd->link_wq, &ppd->link_down_work); + if (xchg(&ppd->is_link_down_queued, 1) == 1) + dd_dev_info(dd, + "%s: link down request already queued\n", + __func__); + else + queue_work(ppd->link_wq, &ppd->link_down_work); } } } -- cgit v1.2.3 From 55774d09b7a1f0d2bfec3320ed71d30fe05466a1 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 13 Aug 2017 08:08:28 -0700 Subject: IB/hfi1: Document phys port state bits not used in IB A couple bits are used by OPA for link physical state that are not present as part of InfiniBand. Add a short blurb what those states mean and removed an unused state. Cc: Leon Romanovsky Reviewed-by: Todd Rimmer Reviewed-by: Brent Rothermel Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/opa_compat.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h index 6ef3c1cbdcd7..774215b95df5 100644 --- a/drivers/infiniband/hw/hfi1/opa_compat.h +++ b/drivers/infiniband/hw/hfi1/opa_compat.h @@ -84,7 +84,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps) /* * OPA port physical states * IB Volume 1, Table 146 PortInfo/IB Volume 2 Section 5.4.2(1) PortPhysState - * values. + * values are the same in OmniPath Architecture. OPA leverages some of the same + * concepts as InfiniBand, but has a few other states as well. * * When writing, only values 0-3 are valid, other values are ignored. * When reading, 0 is reserved. @@ -92,6 +93,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps) * Returned by the ibphys_portstate() routine. */ enum opa_port_phys_state { + /* Values 0-7 have the same meaning in OPA as in InfiniBand. */ + IB_PORTPHYSSTATE_NOP = 0, /* 1 is reserved */ IB_PORTPHYSSTATE_POLLING = 2, @@ -101,9 +104,23 @@ enum opa_port_phys_state { IB_PORTPHYSSTATE_LINK_ERROR_RECOVERY = 6, IB_PORTPHYSSTATE_PHY_TEST = 7, /* 8 is reserved */ + + /* + * Offline: Port is quiet (transmitters disabled) due to lack of + * physical media, unsupported media, or transition between link up + * and next link up attempt + */ OPA_PORTPHYSSTATE_OFFLINE = 9, - OPA_PORTPHYSSTATE_GANGED = 10, + + /* 10 is reserved */ + + /* + * Phy_Test: Specific test patterns are transmitted, and receiver BER + * can be monitored. This facilitates signal integrity testing for the + * physical layer of the port. + */ OPA_PORTPHYSSTATE_TEST = 11, + OPA_PORTPHYSSTATE_MAX = 11, /* values 12-15 are reserved/ignored */ }; -- cgit v1.2.3 From 9161860463e38e1046a5fd57130be150cc0cac5d Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:34 -0700 Subject: IB/hfi1: Add flag for platform config scratch register read Add flag in pport data structure to determine when platform config was read from scratch registers. Change conditions in parse_platform_config and get_platform_config_field to use the new flag. Reviewed-by: Easwar Hariharan Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/firmware.c | 6 ++++-- drivers/infiniband/hw/hfi1/hfi.h | 3 +++ drivers/infiniband/hw/hfi1/platform.c | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 885714b93557..fb095e69a554 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1789,6 +1789,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table) int parse_platform_config(struct hfi1_devdata *dd) { struct platform_config_cache *pcfgcache = &dd->pcfg_cache; + struct hfi1_pportdata *ppd = dd->pport; u32 *ptr = NULL; u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0; u32 record_idx = 0, table_type = 0, table_length_dwords = 0; @@ -1800,7 +1801,7 @@ int parse_platform_config(struct hfi1_devdata *dd) * scratch register bitmap, thus there is no platform config to parse. * Skip parsing in these situations. */ - if (is_integrated(dd) && !platform_config_load) + if (ppd->config_from_scratch) return 0; if (!dd->platform_config.data) { @@ -2089,13 +2090,14 @@ int get_platform_config_field(struct hfi1_devdata *dd, int ret = 0, wlen = 0, seek = 0; u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL; struct platform_config_cache *pcfgcache = &dd->pcfg_cache; + struct hfi1_pportdata *ppd = dd->pport; if (data) memset(data, 0, len); else return -EINVAL; - if (is_integrated(dd) && !platform_config_load) { + if (ppd->config_from_scratch) { /* * Use saved configuration from ppd for integrated platforms */ diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index dff3d3f02b2d..181feca3cb6f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -677,6 +677,9 @@ struct hfi1_pportdata { u8 default_atten; u8 max_power_class; + /* did we read platform config from scratch registers? */ + bool config_from_scratch; + /* GUIDs for this interface, in host order, guids[0] is a port guid */ u64 guids[HFI1_GUIDS_PER_PORT]; diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 5c38a24484ab..ab221b05d553 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -138,6 +138,8 @@ static void save_platform_config_fields(struct hfi1_devdata *dd) ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >> QSFP_MAX_POWER_SHIFT; + + ppd->config_from_scratch = true; } void get_platform_config(struct hfi1_devdata *dd) -- cgit v1.2.3 From 76ae6222a4a37098610c0601a814c9caba94ba0b Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:40 -0700 Subject: IB/hfi1: Load fallback platform configuration per HFI device Currently fallback configuration is loaded once per driver instance. With multiple HFI devices in the same system the current code may not load the platform config data for the device. Change fallback platform config data loading to be per device. Reviewed-by: Easwar Hariharan Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 1 - drivers/infiniband/hw/hfi1/firmware.c | 51 +++-------------------------------- drivers/infiniband/hw/hfi1/platform.c | 41 +++++++++++++++++++--------- 3 files changed, 31 insertions(+), 62 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index f7083025fb04..fc21097e7a56 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -648,7 +648,6 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, #define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */ extern const u8 pcie_serdes_broadcast[]; extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES]; -extern uint platform_config_load; /* SBus commands */ #define RESET_SBUS_RECEIVER 0x20 diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index fb095e69a554..5aea8f47e670 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -64,7 +64,6 @@ #define DEFAULT_FW_FABRIC_NAME "hfi1_fabric.fw" #define DEFAULT_FW_SBUS_NAME "hfi1_sbus.fw" #define DEFAULT_FW_PCIE_NAME "hfi1_pcie.fw" -#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat" #define ALT_FW_8051_NAME_ASIC "hfi1_dc8051_d.fw" #define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" #define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" @@ -76,19 +75,11 @@ static uint fw_fabric_serdes_load = 1; static uint fw_pcie_serdes_load = 1; static uint fw_sbus_load = 1; -/* - * Access required in platform.c - * Maintains state of whether the platform config was fetched via the - * fallback option - */ -uint platform_config_load; - /* Firmware file names get set in hfi1_firmware_init() based on the above */ static char *fw_8051_name; static char *fw_fabric_serdes_name; static char *fw_sbus_name; static char *fw_pcie_serdes_name; -static char *platform_config_name; #define SBUS_MAX_POLL_COUNT 100 #define SBUS_COUNTER(reg, name) \ @@ -178,7 +169,6 @@ static struct firmware_details fw_8051; static struct firmware_details fw_fabric; static struct firmware_details fw_pcie; static struct firmware_details fw_sbus; -static const struct firmware *platform_config; /* flags for turn_off_spicos() */ #define SPICO_SBUS 0x1 @@ -684,7 +674,6 @@ done: static int obtain_firmware(struct hfi1_devdata *dd) { unsigned long timeout; - int err = 0; mutex_lock(&fw_mutex); @@ -708,38 +697,11 @@ static int obtain_firmware(struct hfi1_devdata *dd) } /* not in FW_TRY state */ - if (fw_state == FW_FINAL) { - if (platform_config) { - dd->platform_config.data = platform_config->data; - dd->platform_config.size = platform_config->size; - } - goto done; /* already acquired */ - } else if (fw_state == FW_ERR) { - goto done; /* already tried and failed */ - } - /* fw_state is FW_EMPTY */ - /* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */ - __obtain_firmware(dd); - - if (platform_config_load) { - platform_config = NULL; - err = request_firmware(&platform_config, platform_config_name, - &dd->pcidev->dev); - if (err) { - platform_config = NULL; - dd_dev_err(dd, - "%s: No default platform config file found\n", - __func__); - goto done; - } - dd->platform_config.data = platform_config->data; - dd->platform_config.size = platform_config->size; - } + if (fw_state == FW_EMPTY) + __obtain_firmware(dd); -done: mutex_unlock(&fw_mutex); - return fw_err; } @@ -761,9 +723,6 @@ void dispose_firmware(void) dispose_one_firmware(&fw_pcie); dispose_one_firmware(&fw_sbus); - release_firmware(platform_config); - platform_config = NULL; - /* retain the error state, otherwise revert to empty */ if (fw_state != FW_ERR) fw_state = FW_EMPTY; @@ -1725,10 +1684,8 @@ int hfi1_firmware_init(struct hfi1_devdata *dd) } /* no 8051 or QSFP on simulator */ - if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { + if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) fw_8051_load = 0; - platform_config_load = 0; - } if (!fw_8051_name) { if (dd->icode == ICODE_RTL_SILICON) @@ -1742,8 +1699,6 @@ int hfi1_firmware_init(struct hfi1_devdata *dd) fw_sbus_name = DEFAULT_FW_SBUS_NAME; if (!fw_pcie_serdes_name) fw_pcie_serdes_name = DEFAULT_FW_PCIE_NAME; - if (!platform_config_name) - platform_config_name = DEFAULT_PLATFORM_CONFIG_NAME; return obtain_firmware(dd); } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index ab221b05d553..8004f1da7548 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -45,10 +45,14 @@ * */ +#include + #include "hfi.h" #include "efivar.h" #include "eprom.h" +#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat" + static int validate_scratch_checksum(struct hfi1_devdata *dd) { u64 checksum = 0, temp_scratch = 0; @@ -147,6 +151,7 @@ void get_platform_config(struct hfi1_devdata *dd) int ret = 0; u8 *temp_platform_config = NULL; u32 esize; + const struct firmware *platform_config_file = NULL; if (is_integrated(dd)) { if (validate_scratch_checksum(dd)) { @@ -167,23 +172,33 @@ void get_platform_config(struct hfi1_devdata *dd) dd_dev_err(dd, "%s: Failed to get platform config, falling back to sub-optimal default file\n", __func__); - /* fall back to request firmware */ - platform_config_load = 1; -} -void free_platform_config(struct hfi1_devdata *dd) -{ - if (!platform_config_load) { - /* - * was loaded from EFI or the EPROM, release memory - * allocated by read_efi_var/eprom_read_platform_config - */ - kfree(dd->platform_config.data); + ret = request_firmware(&platform_config_file, + DEFAULT_PLATFORM_CONFIG_NAME, + &dd->pcidev->dev); + if (ret) { + dd_dev_err(dd, + "%s: No default platform config file found\n", + __func__); + return; } + /* - * else do nothing, dispose_firmware will release - * struct firmware platform_config on driver exit + * Allocate separate memory block to store data and free firmware + * structure. This allows free_platform_config to treat EPROM and + * fallback configs in the same manner. */ + dd->platform_config.data = kmemdup(platform_config_file->data, + platform_config_file->size, + GFP_KERNEL); + dd->platform_config.size = platform_config_file->size; + release_firmware(platform_config_file); +} + +void free_platform_config(struct hfi1_devdata *dd) +{ + /* Release memory allocated for eprom or fallback file read. */ + kfree(dd->platform_config.data); } void get_port_type(struct hfi1_pportdata *ppd) -- cgit v1.2.3 From ec0d8b8a63ee760bca1bccc6769d6210e05ded29 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugame Date: Sun, 13 Aug 2017 08:08:46 -0700 Subject: IB/hfi1: Stricter bounds checking of MAD trap index The macro size is valid. This change makes it less ambiguous. Bounds check trap type for better security. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 13 ++++++++++++- include/rdma/rdma_vt.h | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 37b19bfae02a..661ba707fc60 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -151,13 +151,24 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, unsigned long flags; unsigned long timeout; int found = 0; + unsigned int queue_id; + static int trap_count; + + queue_id = trap->data.generic_type & 0x0F; + if (queue_id >= RVT_MAX_TRAP_LISTS) { + trap_count++; + pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n", + trap->data.generic_type, trap_count); + kfree(trap); + return NULL; + } /* * Since the retry (handle timeout) does not remove a trap request * from the list, all we have to do is compare the node. */ spin_lock_irqsave(&ibp->rvp.lock, flags); - trap_list = &ibp->rvp.trap_lists[trap->data.generic_type & 0x0F]; + trap_list = &ibp->rvp.trap_lists[queue_id]; list_for_each_entry(node, &trap_list->list, list) { if (node == trap) { diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1d94f3c264ba..1ba84a78f1c5 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -64,7 +64,7 @@ #define RVT_MAX_PKEY_VALUES 16 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */ -#define RVT_MAX_TRAP_LISTS ((IB_NOTICE_TYPE_INFO & 0x0F) + 1) +#define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/ #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */ struct trap_list { -- cgit v1.2.3 From d392a673e718767049824f99c76bb483d893b881 Mon Sep 17 00:00:00 2001 From: Jakub Byczkowski Date: Sun, 13 Aug 2017 08:08:52 -0700 Subject: IB/hfi1: Remove pstate from hfi1_pportdata Do not track physical state separately from host_link_state. Deduce physical state from host_link_state when required. Change cache_physical_state to log_physical_state to make sure host_link_state reflects hardwares physical state properly. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 54 ++++++++++++++++++++++----------------- drivers/infiniband/hw/hfi1/chip.h | 3 +-- drivers/infiniband/hw/hfi1/hfi.h | 24 ----------------- drivers/infiniband/hw/hfi1/mad.c | 2 +- 4 files changed, 33 insertions(+), 50 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index bfb50a4e7e5f..eb27c7fbfebd 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1068,6 +1068,8 @@ static int thermal_init(struct hfi1_devdata *dd); static void update_statusp(struct hfi1_pportdata *ppd, u32 state); static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); +static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); @@ -10450,11 +10452,11 @@ static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state) } /* - * driver_physical_state - convert the driver's notion of a port's + * driver_pstate - convert the driver's notion of a port's * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*). * Return -1 (converted to a u32) to indicate error. */ -u32 driver_physical_state(struct hfi1_pportdata *ppd) +u32 driver_pstate(struct hfi1_pportdata *ppd) { switch (ppd->host_link_state) { case HLS_UP_INIT: @@ -10720,7 +10722,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ret) goto_offline(ppd, 0); else - cache_physical_state(ppd); + log_physical_state(ppd, PLS_POLLING); break; case HLS_DN_DISABLE: /* link is disabled */ @@ -10769,7 +10771,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (ppd->host_link_state != HLS_DN_POLL) goto unexpected; ppd->host_link_state = HLS_VERIFY_CAP; - cache_physical_state(ppd); + log_physical_state(ppd, PLS_CONFIGPHY_VERIFYCAP); break; case HLS_GOING_UP: if (ppd->host_link_state != HLS_VERIFY_CAP) @@ -12743,25 +12745,30 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, return 0; } +static void log_state_transition(struct hfi1_pportdata *ppd, u32 state) +{ + u32 ib_pstate = chip_to_opa_pstate(ppd->dd, state); + + dd_dev_info(ppd->dd, + "physical state changed to %s (0x%x), phy 0x%x\n", + opa_pstate_name(ib_pstate), ib_pstate, state); +} + /* - * Read the physical hardware link state and set the driver's cached value - * of it. + * Read the physical hardware link state and check if it matches host + * drivers anticipated state. */ -void cache_physical_state(struct hfi1_pportdata *ppd) +static void log_physical_state(struct hfi1_pportdata *ppd, u32 state) { - u32 read_pstate; - u32 ib_pstate; + u32 read_state = read_physical_state(ppd->dd); - read_pstate = read_physical_state(ppd->dd); - ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate); - /* check if OPA pstate changed */ - if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) { - dd_dev_info(ppd->dd, - "%s: physical state changed to %s (0x%x), phy 0x%x\n", - __func__, opa_pstate_name(ib_pstate), ib_pstate, - read_pstate); + if (read_state == state) { + log_state_transition(ppd, state); + } else { + dd_dev_err(ppd->dd, + "anticipated phy link state 0x%x, read 0x%x\n", + state, read_state); } - ppd->pstate = read_pstate; } /* @@ -12776,22 +12783,24 @@ void cache_physical_state(struct hfi1_pportdata *ppd) static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs) { + u32 read_state; unsigned long timeout; timeout = jiffies + msecs_to_jiffies(msecs); while (1) { - cache_physical_state(ppd); - if (ppd->pstate == state) + read_state = read_physical_state(ppd->dd); + if (read_state == state) break; if (time_after(jiffies, timeout)) { dd_dev_err(ppd->dd, - "timeout waiting for phy link state 0x%x, current state is 0x%x\n", - state, ppd->pstate); + "timeout waiting for phy link state 0x%x\n", + state); return -ETIMEDOUT; } usleep_range(1950, 2050); /* sleep 2ms-ish */ } + log_state_transition(ppd, state); return 0; } @@ -14896,7 +14905,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, /* start in offline */ ppd->host_link_state = HLS_DN_OFFLINE; init_vl_arb_caches(ppd); - ppd->pstate = PLS_OFFLINE; } dd->link_default = HLS_DN_POLL; diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index fc21097e7a56..b8345a60a0fb 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -747,10 +747,9 @@ int is_ax(struct hfi1_devdata *dd); int is_bx(struct hfi1_devdata *dd); u32 read_physical_state(struct hfi1_devdata *dd); u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate); -void cache_physical_state(struct hfi1_pportdata *ppd); const char *opa_lstate_name(u32 lstate); const char *opa_pstate_name(u32 pstate); -u32 driver_physical_state(struct hfi1_pportdata *ppd); +u32 driver_pstate(struct hfi1_pportdata *ppd); u32 driver_lstate(struct hfi1_pportdata *ppd); int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 181feca3cb6f..b15749eab921 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -758,7 +758,6 @@ struct hfi1_pportdata { u8 link_enabled; /* link enabled? */ u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ - u8 pstate; /* info only */ u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ @@ -1428,29 +1427,6 @@ static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) int hfi1_reset_device(int); -/* return the driver's idea of the physical OPA port state */ -static inline u32 driver_pstate(struct hfi1_pportdata *ppd) -{ - /* - * When DC is shut down and state is changed, its CSRs are not - * impacted, therefore host_link_state should be used to get - * current physical state. - */ - if (ppd->dd->dc_shutdown) - return driver_physical_state(ppd); - /* - * The driver does some processing from the time the physical - * link state is at LINKUP to the time the SM can be notified - * as such. Return IB_PORTPHYSSTATE_TRAINING until the software - * state is ready. - */ - if (ppd->pstate == PLS_LINKUP && - !(ppd->host_link_state & HLS_UP)) - return IB_PORTPHYSSTATE_TRAINING; - else - return chip_to_opa_pstate(ppd->dd, ppd->pstate); -} - void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 661ba707fc60..4849130d49bc 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1181,7 +1181,7 @@ static int physical_transition_allowed(int old, int new) static int port_states_transition_allowed(struct hfi1_pportdata *ppd, u32 logical_new, u32 physical_new) { - u32 physical_old = driver_physical_state(ppd); + u32 physical_old = driver_pstate(ppd); u32 logical_old = driver_lstate(ppd); int ret, logical_allowed, physical_allowed; -- cgit v1.2.3 From 6165467921a891bc79675cdb7f2791d8331f592c Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Sun, 13 Aug 2017 08:08:58 -0700 Subject: IB/hfi1: Remove HFI1_VERBS_31BIT_PSN option Remove HFI1_VERBS_31BIT_PSN Kconfig option leaving only 31-bit PSNs available. The option was implemented in the early days of the driver and is no longer needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/Kconfig | 7 ------- drivers/infiniband/hw/hfi1/verbs.h | 5 ----- 2 files changed, 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig index f6ea0881765a..7b146b67a80f 100644 --- a/drivers/infiniband/hw/hfi1/Kconfig +++ b/drivers/infiniband/hw/hfi1/Kconfig @@ -13,13 +13,6 @@ config HFI1_DEBUG_SDMA_ORDER ---help--- This is a debug flag to test for out of order sdma completions for unit testing -config HFI1_VERBS_31BIT_PSN - bool "HFI1 enable 31 bit PSN" - depends on INFINIBAND_HFI1 - default y - ---help--- - Setting this enables 31 BIT PSN - For verbs RC/UC config SDMA_VERBOSITY bool "Config SDMA Verbosity" depends on INFINIBAND_HFI1 diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 56ead87a7f65..87d1285a3340 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -280,13 +280,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, * necessarily be at least one bit less than * the container holding the PSN. */ -#ifndef CONFIG_HFI1_VERBS_31BIT_PSN -#define PSN_MASK 0xFFFFFF -#define PSN_SHIFT 8 -#else #define PSN_MASK 0x7FFFFFFF #define PSN_SHIFT 1 -#endif #define PSN_MODIFY_MASK 0xFFFFFF /* -- cgit v1.2.3 From bf808b5039c66f9843cdc30f18c0608dbbf11374 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sun, 13 Aug 2017 08:09:04 -0700 Subject: IB/hfi1: Add kernel receive context info to debugfs Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 47 ++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/driver.c | 59 ++++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/hfi1/hfi.h | 1 + 3 files changed, 107 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 550119c6f1ee..c6a472f01025 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -368,6 +368,52 @@ DEBUGFS_SEQ_FILE_OPS(sdes); DEBUGFS_SEQ_FILE_OPEN(sdes) DEBUGFS_FILE_OPS(sdes); +static void *_rcds_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->rcd || *pos >= dd->n_krcv_queues) + return NULL; + return pos; +} + +static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->rcd || *pos >= dd->n_krcv_queues) + return NULL; + return pos; +} + +static void _rcds_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _rcds_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; + loff_t *spos = v; + loff_t i = *spos; + + rcd = hfi1_rcd_get_by_index(dd, i); + if (rcd) + seqfile_dump_rcd(s, rcd); + hfi1_rcd_put(rcd); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(rcds); +DEBUGFS_SEQ_FILE_OPEN(rcds) +DEBUGFS_FILE_OPS(rcds); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1321,6 +1367,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index fc7085d6cf3f..7372cc00cb2d 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1689,3 +1689,62 @@ int process_receive_invalid(struct hfi1_packet *packet) rhf_rcv_type(packet->rhf)); return RHF_RCV_CONTINUE; } + +void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd) +{ + struct hfi1_packet packet; + struct ps_mdata mdata; + + seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n", + rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize, + HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + "dma_rtail" : "nodma_rtail", + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) & + RCV_HDR_HEAD_HEAD_MASK, + read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL)); + + init_packet(rcd, &packet); + init_ps_mdata(&mdata, &packet); + + while (1) { + struct hfi1_devdata *dd = rcd->dd; + __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + + dd->rhf_offset; + struct ib_header *hdr; + u64 rhf = rhf_to_cpu(rhf_addr); + u32 etype = rhf_rcv_type(rhf), qpn; + u8 opcode; + u32 psn; + u8 lnh; + + if (ps_done(&mdata, rhf, rcd)) + break; + + if (ps_skip(&mdata, rhf, rcd)) + goto next; + + if (etype > RHF_RCV_TYPE_IB) + goto next; + + packet.hdr = hfi1_get_msgheader(dd, rhf_addr); + hdr = packet.hdr; + + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + + if (lnh == HFI1_LRH_BTH) + packet.ohdr = &hdr->u.oth; + else if (lnh == HFI1_LRH_GRH) + packet.ohdr = &hdr->u.l.oth; + else + goto next; /* just in case */ + + opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24); + qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK; + psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2])); + + seq_printf(s, "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n", + mdata.ps_head, opcode, qpn, psn); +next: + update_ps_mdata(&mdata, rcd); + } +} diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index b15749eab921..eadb735d958c 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1987,6 +1987,7 @@ int process_receive_error(struct hfi1_packet *packet); int kdeth_process_expected(struct hfi1_packet *packet); int kdeth_process_eager(struct hfi1_packet *packet); int process_receive_invalid(struct hfi1_packet *packet); +void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); /* global module parameter variables */ extern unsigned int hfi1_max_mtu; -- cgit v1.2.3 From d26875b43d45644e87f4c0b6bb2d7abf3c61d529 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 8 Aug 2017 20:38:45 -0500 Subject: i40iw: Improve CQP timeout logic The current timeout logic for Control Queue-Pair (CQP) OPs does not take into account whether CQP makes progress but rather blindly waits for a large timeout value, 100000 jiffies for the completion event. Improve this by setting the timeout based on whether the CQP is making progress or not. If the CQP is hung, the timeout will happen sooner, in 5000 jiffies. Each time the CQP progress is detetcted, the timeout extends by 5000 jiffies. Signed-off-by: Shiraz Saleem Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 11 +++++++++++ drivers/infiniband/hw/i40iw/i40iw_p.h | 14 +++++++++----- drivers/infiniband/hw/i40iw/i40iw_type.h | 5 +++++ drivers/infiniband/hw/i40iw/i40iw_utils.c | 22 ++++++++++++++-------- 4 files changed, 39 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index a49ff2eb6fb3..d1f5345f04f0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -54,6 +54,17 @@ static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header) set_64bit_val(wqe, 24, header); } +void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev) +{ + if (cqp_timeout->compl_cqp_cmds != dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]) { + cqp_timeout->compl_cqp_cmds = dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]; + cqp_timeout->count = 0; + } else { + if (dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] != cqp_timeout->compl_cqp_cmds) + cqp_timeout->count++; + } +} + /** * i40iw_get_cqp_reg_info - get head and tail for cqp using registers * @cqp: struct for cqp hw diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h index 28a92fee0822..e217a1259f57 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_p.h +++ b/drivers/infiniband/hw/i40iw/i40iw_p.h @@ -35,11 +35,13 @@ #ifndef I40IW_P_H #define I40IW_P_H -#define PAUSE_TIMER_VALUE 0xFFFF -#define REFRESH_THRESHOLD 0x7FFF -#define HIGH_THRESHOLD 0x800 -#define LOW_THRESHOLD 0x200 -#define ALL_TC2PFC 0xFF +#define PAUSE_TIMER_VALUE 0xFFFF +#define REFRESH_THRESHOLD 0x7FFF +#define HIGH_THRESHOLD 0x800 +#define LOW_THRESHOLD 0x200 +#define ALL_TC2PFC 0xFF +#define CQP_COMPL_WAIT_TIME 0x3E8 +#define CQP_TIMEOUT_THRESHOLD 5 void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask, char *desc, u64 *buf, u32 size); @@ -51,6 +53,8 @@ void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp); u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch); +void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev); + enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp, struct i40iw_fast_reg_stag_info *info, bool post_sq); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 959ec81fba99..63118f6d5ab4 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -1345,4 +1345,9 @@ struct i40iw_virtchnl_work_info { void *worker_vf_dev; }; +struct i40iw_cqp_timeout { + u64 compl_cqp_cmds; + u8 count; +}; + #endif diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e311ec559f4e..62f1f45b8737 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -445,23 +445,29 @@ static int i40iw_wait_event(struct i40iw_device *iwdev, { struct cqp_commands_info *info = &cqp_request->info; struct i40iw_cqp *iwcqp = &iwdev->cqp; + struct i40iw_cqp_timeout cqp_timeout; bool cqp_error = false; int err_code = 0; - int timeout_ret = 0; + memset(&cqp_timeout, 0, sizeof(cqp_timeout)); + cqp_timeout.compl_cqp_cmds = iwdev->sc_dev.cqp_cmd_stats[OP_COMPLETED_COMMANDS]; + do { + if (wait_event_timeout(cqp_request->waitq, + cqp_request->request_done, CQP_COMPL_WAIT_TIME)) + break; - timeout_ret = wait_event_timeout(cqp_request->waitq, - cqp_request->request_done, - I40IW_EVENT_TIMEOUT); - if (!timeout_ret) { - i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n", - info->cqp_cmd, timeout_ret); + i40iw_check_cqp_progress(&cqp_timeout, &iwdev->sc_dev); + + if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD) + continue; + + i40iw_pr_err("error cqp command 0x%x timed out", info->cqp_cmd); err_code = -ETIME; if (!iwdev->reset) { iwdev->reset = true; i40iw_request_reset(iwdev); } goto done; - } + } while (1); cqp_error = cqp_request->compl_info.error; if (cqp_error) { i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n", -- cgit v1.2.3 From 0c98568c1f00eed16c4b452557ec40f5ac64784d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Fri, 11 Aug 2017 00:12:11 +0300 Subject: IB/pvrdma: Remove unused function The function pvrdma_idx_ring_is_valid_idx is not in used so let's remove it. Signed-off-by: Yuval Shaia Acked-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h index ed9022a91a1d..8b558ae234c8 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h @@ -111,21 +111,4 @@ static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r, return PVRDMA_INVALID_IDX; } -static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r, - __u32 max_elems, __u32 *idx) -{ - const __u32 tail = atomic_read(&r->prod_tail); - const __u32 head = atomic_read(&r->cons_head); - - if (pvrdma_idx_valid(tail, max_elems) && - pvrdma_idx_valid(head, max_elems) && - pvrdma_idx_valid(*idx, max_elems)) { - if (tail > head && (*idx < tail && *idx >= head)) - return true; - else if (head > tail && (*idx >= head || *idx < tail)) - return true; - } - return false; -} - #endif /* __PVRDMA_RING_H__ */ -- cgit v1.2.3 From 7be05753ccc27ce056d45f06a50d150927a88ed7 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 17 Aug 2017 07:58:07 -0700 Subject: RDMA: Fix return value check for ib_get_eth_speed() ib_get_eth_speed() return 0 on success. Fixing the condition checking and prevent reporting failure for query_port verb. Fixes: d41861942fc5 ("Add generic function to extract IB speed from netdev") Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++-- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 24e2785774b8..43b3dee4b6ba 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -264,8 +264,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, * IB stack to avoid race in the NETDEV_UNREG path */ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - if (!ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, - &port_attr->active_width)) + if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width)) return -EINVAL; return 0; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index e5f57dd49980..97dd79ebb590 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -309,8 +309,8 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); mutex_lock(&us_ibdev->usdev_lock); - if (!ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { + if (ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width)) { mutex_unlock(&us_ibdev->usdev_lock); return -EINVAL; } -- cgit v1.2.3 From b588300801f3502a7de5ca897af68019fbb3bc79 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 16 Aug 2017 23:31:22 +1000 Subject: IB/mlx5: use kvmalloc_array for mlx5_ib_wq We observed multiple times on our Lustre OSS servers that when the system memory is fragmented, kmalloc() in create_kernel_qp() could fail order 4/5 allocations while we still have many free pages. Switch to kvmalloc_array() to allow the operation to contine. Signed-off-by: Li Dongyang Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 35 ++++++++++++++++++++--------------- drivers/infiniband/hw/mlx5/srq.c | 4 ++-- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5c7ce9bd466e..e098c97e027a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -965,11 +965,16 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, goto err_free; } - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); - qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); - qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); - qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt, + sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(*qp->sq.wqe_head), GFP_KERNEL); if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || !qp->sq.w_list || !qp->sq.wqe_head) { @@ -981,11 +986,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, return 0; err_wrid: - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); err_free: @@ -998,11 +1003,11 @@ err_buf: static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { - kfree(qp->sq.wqe_head); - kfree(qp->sq.w_list); - kfree(qp->sq.wrid); - kfree(qp->sq.wr_data); - kfree(qp->rq.wrid); + kvfree(qp->sq.wqe_head); + kvfree(qp->sq.w_list); + kvfree(qp->sq.wrid); + kvfree(qp->sq.wr_data); + kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); mlx5_buf_free(dev->mdev, &qp->buf); } diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 43707b101f47..30b3ddd8e1ab 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -196,7 +196,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } mlx5_fill_page_array(&srq->buf, in->pas); - srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { err = -ENOMEM; goto err_in; @@ -230,7 +230,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); mlx5_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } -- cgit v1.2.3 From e9105cdefbf64cd7aea300f934c92051e7cb7cff Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Wed, 16 Aug 2017 23:31:23 +1000 Subject: IB/mlx4: use kvmalloc_array to allocate wrid We could use kvmalloc_array instead of the kmalloc and __vmalloc combination. After this we don't need to include linux/vmalloc.h Signed-off-by: Li Dongyang Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 15 ++++----------- drivers/infiniband/hw/mlx4/srq.c | 13 ++++--------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e42acfb20588..b40d50c643e1 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -1174,16 +1173,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); - if (!qp->sq.wrid) - qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); - qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); - if (!qp->rq.wrid) - qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); + qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt, + sizeof(u64), GFP_KERNEL); + qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt, + sizeof(u64), GFP_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 0facaf5f6d23..dd7a2fce9df4 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "mlx4_ib.h" #include @@ -171,15 +170,11 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; - srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64), - GFP_KERNEL | __GFP_NOWARN); + srq->wrid = kvmalloc_array(srq->msrq.max, + sizeof(u64), GFP_KERNEL); if (!srq->wrid) { - srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64), - GFP_KERNEL, PAGE_KERNEL); - if (!srq->wrid) { - err = -ENOMEM; - goto err_mtt; - } + err = -ENOMEM; + goto err_mtt; } } -- cgit v1.2.3 From 1eb5be0ec79a7b21cd6b5b73d9de294dc1809e0f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Aug 2017 14:57:38 -0600 Subject: rdma: Allow demand loading of NETLINK_RDMA Provide a module alias so that if userspace opens a netlink socket for RDMA the kernel support is loaded automatically. Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 27352a352770..f782697cf4d8 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "core_priv.h" #include "core_priv.h" @@ -290,3 +291,5 @@ void rdma_nl_exit(void) netlink_kernel_release(nls); } + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA); -- cgit v1.2.3 From e3bf14bdc17a8e917f337760cc7cacf3232d7dbc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Aug 2017 14:57:39 -0600 Subject: rdma: Autoload netlink client modules If a message comes in and we do not have the client in the table, then try to load the module supplying that client using MODULE_ALIAS to find it. This duplicates the scheme seen in other netlink muxes (eg nfnetlink). Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 ++ drivers/infiniband/core/device.c | 2 ++ drivers/infiniband/core/iwcm.c | 2 ++ drivers/infiniband/core/netlink.c | 9 +++++++++ drivers/infiniband/core/nldev.c | 3 +++ include/rdma/rdma_netlink.h | 12 ++++++++++++ 6 files changed, 30 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d8edd8b11561..b76de2e2b209 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4537,5 +4537,7 @@ static void __exit cma_cleanup(void) destroy_workqueue(cma_wq); } +MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1); + module_init(cma_init); module_exit(cma_cleanup); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 91d7cea1a0b9..fc6be1175183 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1252,5 +1252,7 @@ static void __exit ib_core_cleanup(void) destroy_workqueue(ib_wq); } +MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4); + module_init(ib_core_init); module_exit(ib_core_cleanup); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index e33528e102f8..fcf42f6bb82a 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1200,5 +1200,7 @@ static void __exit iw_cm_cleanup(void) iwpm_exit(RDMA_NL_IWCM); } +MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); + module_init(iw_cm_init); module_exit(iw_cm_cleanup); diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index f782697cf4d8..e685148dd3e6 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -84,6 +84,15 @@ static bool is_nl_valid(unsigned int type, unsigned int op) return false; cb_table = rdma_nl_types[type].cb_table; +#ifdef CONFIG_MODULES + if (!cb_table) { + mutex_unlock(&rdma_nl_mutex); + request_module("rdma-netlink-subsys-%d", type); + mutex_lock(&rdma_nl_mutex); + cb_table = rdma_nl_types[type].cb_table; + } +#endif + if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) return false; return true; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 474022274e09..3ba24c428c3b 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -30,6 +30,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include #include @@ -320,3 +321,5 @@ void __exit nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } + +MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5); diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index e25bf1988846..2d878596b1e0 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -17,6 +17,18 @@ enum rdma_nl_flags { RDMA_NL_ADMIN_PERM = 1 << 0, }; +/* Define this module as providing netlink services for NETLINK_RDMA, with + * index _index. Since the client indexes were setup in a uapi header as an + * enum and we do no want to change that, the user must supply the expanded + * constant as well and the compiler checks they are the same. + */ +#define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ + static inline void __chk_##_index(void) \ + { \ + BUILD_BUG_ON(_index != _val); \ + } \ + MODULE_ALIAS("rdma-netlink-subsys-" __stringify(_val)) + /** * Register client in RDMA netlink. * @index: Index of the added client -- cgit v1.2.3 From 5ab2d89b85a6c5c2d7604c9674f77a68f3d24f91 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:32 +0300 Subject: IB/cma: Fix erroneous validation of supported default GID type When rdma_cm is initializing a cma_device it checks if this device supports the preferred default GID type. This check was done in a wrong way and therefore sometimes rdma_cm is coming up with default GID type that is not supported by the device. Fix that by checking for supported GID type properly. Fixes: 3c7f67d1880d ("IB/cma: Fix default RoCE type setting") Signed-off-by: Moni Shoua Signed-off-by: Leon Romanovsky Reviewed-by: Selvin Xavier Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b76de2e2b209..852c8fec8088 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,7 +72,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 -#define CMA_PREFERRED_ROCE_GID_TYPE (1 << IB_GID_TYPE_ROCE_UDP_ENCAP) +#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP static const char * const cma_events[] = { [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", @@ -4282,7 +4282,7 @@ static void cma_add_one(struct ib_device *device) for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { supported_gids = roce_gid_type_mask_support(device, i); WARN_ON(!supported_gids); - if (supported_gids & CMA_PREFERRED_ROCE_GID_TYPE) + if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE)) cma_dev->default_gid_type[i - rdma_start_port(device)] = CMA_PREFERRED_ROCE_GID_TYPE; else -- cgit v1.2.3 From 89caa0538ecf2114e1badbb1e75120d25bec985e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 17 Aug 2017 15:50:33 +0300 Subject: IB/uverbs: Introduce and use helper functions to copy ah attributes This patch introduces two helper functions to copy ah attributes from uverbs to internal ib_ah_attr structure and the other way during modify qp and query qp respectively. Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 127 ++++++++++++++--------------------- 1 file changed, 51 insertions(+), 76 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 59696f568f53..8e9fea03dec4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1820,6 +1820,28 @@ err_put: return ret; } +static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr, + struct rdma_ah_attr *rdma_attr) +{ + const struct ib_global_route *grh; + + uverb_attr->dlid = rdma_ah_get_dlid(rdma_attr); + uverb_attr->sl = rdma_ah_get_sl(rdma_attr); + uverb_attr->src_path_bits = rdma_ah_get_path_bits(rdma_attr); + uverb_attr->static_rate = rdma_ah_get_static_rate(rdma_attr); + uverb_attr->is_global = !!(rdma_ah_get_ah_flags(rdma_attr) & + IB_AH_GRH); + if (uverb_attr->is_global) { + grh = rdma_ah_read_grh(rdma_attr); + memcpy(uverb_attr->dgid, grh->dgid.raw, 16); + uverb_attr->flow_label = grh->flow_label; + uverb_attr->sgid_index = grh->sgid_index; + uverb_attr->hop_limit = grh->hop_limit; + uverb_attr->traffic_class = grh->traffic_class; + } + uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr); +} + ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, @@ -1830,7 +1852,6 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_qp *qp; struct ib_qp_attr *attr; struct ib_qp_init_attr *init_attr; - const struct ib_global_route *grh; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) @@ -1880,39 +1901,8 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; - resp.dest.dlid = rdma_ah_get_dlid(&attr->ah_attr); - resp.dest.sl = rdma_ah_get_sl(&attr->ah_attr); - resp.dest.src_path_bits = rdma_ah_get_path_bits(&attr->ah_attr); - resp.dest.static_rate = rdma_ah_get_static_rate(&attr->ah_attr); - resp.dest.is_global = !!(rdma_ah_get_ah_flags(&attr->ah_attr) & - IB_AH_GRH); - if (resp.dest.is_global) { - grh = rdma_ah_read_grh(&attr->ah_attr); - memcpy(resp.dest.dgid, grh->dgid.raw, 16); - resp.dest.flow_label = grh->flow_label; - resp.dest.sgid_index = grh->sgid_index; - resp.dest.hop_limit = grh->hop_limit; - resp.dest.traffic_class = grh->traffic_class; - } - resp.dest.port_num = rdma_ah_get_port_num(&attr->ah_attr); - - resp.alt_dest.dlid = rdma_ah_get_dlid(&attr->alt_ah_attr); - resp.alt_dest.sl = rdma_ah_get_sl(&attr->alt_ah_attr); - resp.alt_dest.src_path_bits = rdma_ah_get_path_bits(&attr->alt_ah_attr); - resp.alt_dest.static_rate - = rdma_ah_get_static_rate(&attr->alt_ah_attr); - resp.alt_dest.is_global - = !!(rdma_ah_get_ah_flags(&attr->alt_ah_attr) & - IB_AH_GRH); - if (resp.alt_dest.is_global) { - grh = rdma_ah_read_grh(&attr->alt_ah_attr); - memcpy(resp.alt_dest.dgid, grh->dgid.raw, 16); - resp.alt_dest.flow_label = grh->flow_label; - resp.alt_dest.sgid_index = grh->sgid_index; - resp.alt_dest.hop_limit = grh->hop_limit; - resp.alt_dest.traffic_class = grh->traffic_class; - } - resp.alt_dest.port_num = rdma_ah_get_port_num(&attr->alt_ah_attr); + copy_ah_attr_to_uverbs(&resp.dest, &attr->ah_attr); + copy_ah_attr_to_uverbs(&resp.alt_dest, &attr->alt_ah_attr); resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; @@ -1946,6 +1936,29 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) } } +static void copy_ah_attr_from_uverbs(struct ib_device *dev, + struct rdma_ah_attr *rdma_attr, + struct ib_uverbs_qp_dest *uverb_attr) +{ + rdma_attr->type = rdma_ah_find_type(dev, uverb_attr->port_num); + if (uverb_attr->is_global) { + rdma_ah_set_grh(rdma_attr, NULL, + uverb_attr->flow_label, + uverb_attr->sgid_index, + uverb_attr->hop_limit, + uverb_attr->traffic_class); + rdma_ah_set_dgid_raw(rdma_attr, uverb_attr->dgid); + } else { + rdma_ah_set_ah_flags(rdma_attr, 0); + } + rdma_ah_set_dlid(rdma_attr, uverb_attr->dlid); + rdma_ah_set_sl(rdma_attr, uverb_attr->sl); + rdma_ah_set_path_bits(rdma_attr, uverb_attr->src_path_bits); + rdma_ah_set_static_rate(rdma_attr, uverb_attr->static_rate); + rdma_ah_set_port_num(rdma_attr, uverb_attr->port_num); + rdma_ah_set_make_grd(rdma_attr, false); +} + static int modify_qp(struct ib_uverbs_file *file, struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) { @@ -1993,50 +2006,12 @@ static int modify_qp(struct ib_uverbs_file *file, attr->rate_limit = cmd->rate_limit; if (cmd->base.attr_mask & IB_QP_AV) - attr->ah_attr.type = rdma_ah_find_type(qp->device, - cmd->base.dest.port_num); - if (cmd->base.dest.is_global) { - rdma_ah_set_grh(&attr->ah_attr, NULL, - cmd->base.dest.flow_label, - cmd->base.dest.sgid_index, - cmd->base.dest.hop_limit, - cmd->base.dest.traffic_class); - rdma_ah_set_dgid_raw(&attr->ah_attr, cmd->base.dest.dgid); - } else { - rdma_ah_set_ah_flags(&attr->ah_attr, 0); - } - rdma_ah_set_dlid(&attr->ah_attr, cmd->base.dest.dlid); - rdma_ah_set_sl(&attr->ah_attr, cmd->base.dest.sl); - rdma_ah_set_path_bits(&attr->ah_attr, cmd->base.dest.src_path_bits); - rdma_ah_set_static_rate(&attr->ah_attr, cmd->base.dest.static_rate); - rdma_ah_set_port_num(&attr->ah_attr, - cmd->base.dest.port_num); - rdma_ah_set_make_grd(&attr->ah_attr, false); + copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr, + &cmd->base.dest); if (cmd->base.attr_mask & IB_QP_ALT_PATH) - attr->alt_ah_attr.type = - rdma_ah_find_type(qp->device, cmd->base.dest.port_num); - if (cmd->base.alt_dest.is_global) { - rdma_ah_set_grh(&attr->alt_ah_attr, NULL, - cmd->base.alt_dest.flow_label, - cmd->base.alt_dest.sgid_index, - cmd->base.alt_dest.hop_limit, - cmd->base.alt_dest.traffic_class); - rdma_ah_set_dgid_raw(&attr->alt_ah_attr, - cmd->base.alt_dest.dgid); - } else { - rdma_ah_set_ah_flags(&attr->alt_ah_attr, 0); - } - - rdma_ah_set_dlid(&attr->alt_ah_attr, cmd->base.alt_dest.dlid); - rdma_ah_set_sl(&attr->alt_ah_attr, cmd->base.alt_dest.sl); - rdma_ah_set_path_bits(&attr->alt_ah_attr, - cmd->base.alt_dest.src_path_bits); - rdma_ah_set_static_rate(&attr->alt_ah_attr, - cmd->base.alt_dest.static_rate); - rdma_ah_set_port_num(&attr->alt_ah_attr, - cmd->base.alt_dest.port_num); - rdma_ah_set_make_grd(&attr->alt_ah_attr, false); + copy_ah_attr_from_uverbs(qp->device, &attr->alt_ah_attr, + &cmd->base.alt_dest); ret = ib_modify_qp_with_udata(qp, attr, modify_qp_mask(qp->qp_type, -- cgit v1.2.3 From fba02e6cb775078bc0bdf207197e0a44b1f85aca Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:34 +0300 Subject: RDMA/mlx4: Don't use uninitialized variable Avoid usage of uninitialized variable. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index b40d50c643e1..f7ee1792ba6d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2027,8 +2027,8 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type) */ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num) { + int err = 0; int i; - int err; for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { struct ib_wq *ibwq = ind_tbl->ind_tbl[i]; -- cgit v1.2.3 From dcc9881e6767559c04faf15804ac145a2ea026cb Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:36 +0300 Subject: RDMA/(core, ulp): Convert register/unregister event handler to be void The functions ib_register_event_handler() and ib_unregister_event_handler() always returned success and they can't fail. Let's convert those functions to be void, remove redundant checks and cleanup tons of goto statements. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 23 ++++++++--------------- drivers/infiniband/core/device.c | 8 ++------ drivers/infiniband/core/sa_query.c | 3 +-- drivers/infiniband/core/uverbs_main.c | 13 +------------ drivers/infiniband/ulp/ipoib/ipoib_main.c | 10 +--------- drivers/infiniband/ulp/iser/iser_verbs.c | 6 ++---- drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c | 7 +------ drivers/infiniband/ulp/srpt/ib_srpt.c | 5 ++--- include/rdma/ib_verbs.h | 4 ++-- 9 files changed, 20 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index efc94304dee3..77515638c55c 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1199,30 +1199,23 @@ int ib_cache_setup_one(struct ib_device *device) device->cache.ports = kzalloc(sizeof(*device->cache.ports) * (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); - if (!device->cache.ports) { - err = -ENOMEM; - goto out; - } + if (!device->cache.ports) + return -ENOMEM; err = gid_table_setup_one(device); - if (err) - goto out; + if (err) { + kfree(device->cache.ports); + device->cache.ports = NULL; + return err; + } for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device), true); INIT_IB_EVENT_HANDLER(&device->cache.event_handler, device, ib_cache_event); - err = ib_register_event_handler(&device->cache.event_handler); - if (err) - goto err; - + ib_register_event_handler(&device->cache.event_handler); return 0; - -err: - gid_table_cleanup_one(device); -out: - return err; } void ib_cache_release_one(struct ib_device *device) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index fc6be1175183..ec4786777447 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -747,7 +747,7 @@ EXPORT_SYMBOL(ib_set_client_data); * chapter 11 of the InfiniBand Architecture Specification). This * callback may occur in interrupt context. */ -int ib_register_event_handler (struct ib_event_handler *event_handler) +void ib_register_event_handler(struct ib_event_handler *event_handler) { unsigned long flags; @@ -755,8 +755,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler) list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); - - return 0; } EXPORT_SYMBOL(ib_register_event_handler); @@ -767,15 +765,13 @@ EXPORT_SYMBOL(ib_register_event_handler); * Unregister an event handler registered with * ib_register_event_handler(). */ -int ib_unregister_event_handler(struct ib_event_handler *event_handler) +void ib_unregister_event_handler(struct ib_event_handler *event_handler) { unsigned long flags; spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); list_del(&event_handler->list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); - - return 0; } EXPORT_SYMBOL(ib_unregister_event_handler); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 0179b21bad34..ab5e1024fea9 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2417,8 +2417,7 @@ static void ib_sa_add_one(struct ib_device *device) */ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); - if (ib_register_event_handler(&sa_dev->event_handler)) - goto err; + ib_register_event_handler(&sa_dev->event_handler); for (i = 0; i <= e - s; ++i) { if (rdma_cap_ib_sa(device, i + 1)) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5e530d2bee44..defeda33e27f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -595,7 +595,6 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file { struct ib_uverbs_async_event_file *ev_file; struct file *filp; - int ret; ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); if (!ev_file) @@ -621,21 +620,11 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, ib_dev, ib_uverbs_event_handler); - ret = ib_register_event_handler(&uverbs_file->event_handler); - if (ret) - goto err_put_file; - + ib_register_event_handler(&uverbs_file->event_handler); /* At that point async file stuff was fully set */ return filp; -err_put_file: - fput(filp); - kref_put(&uverbs_file->async_file->ref, - ib_uverbs_release_async_event_file); - uverbs_file->async_file = NULL; - return ERR_PTR(ret); - err_put_refs: kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&ev_file->ref, ib_uverbs_release_async_event_file); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ee9f5d281b37..344e8d3d47bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2227,13 +2227,7 @@ static struct net_device *ipoib_add_port(const char *format, INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); - result = ib_register_event_handler(&priv->event_handler); - if (result < 0) { - printk(KERN_WARNING "%s: ib_register_event_handler failed for " - "port %d (ret = %d)\n", - hca->name, port, result); - goto event_failed; - } + ib_register_event_handler(&priv->event_handler); result = register_netdev(priv->dev); if (result) { @@ -2266,8 +2260,6 @@ register_failed: set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(priv->wq); - -event_failed: ipoib_dev_cleanup(priv->dev); device_init_failed: diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 26a004e97ae0..55a73b0ed4c6 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -106,9 +106,7 @@ static int iser_create_device_ib_res(struct iser_device *device) INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, iser_event_handler); - if (ib_register_event_handler(&device->event_handler)) - goto cq_err; - + ib_register_event_handler(&device->event_handler); return 0; cq_err: @@ -141,7 +139,7 @@ static void iser_free_device_ib_res(struct iser_device *device) comp->cq = NULL; } - (void)ib_unregister_event_handler(&device->event_handler); + ib_unregister_event_handler(&device->event_handler); ib_dealloc_pd(device->pd); kfree(device->comps); diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c index 57b862b94dca..21f0b481edcc 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c @@ -954,12 +954,7 @@ static int vema_register(struct opa_vnic_ctrl_port *cport) INIT_IB_EVENT_HANDLER(&port->event_handler, cport->ibdev, opa_vnic_event); - ret = ib_register_event_handler(&port->event_handler); - if (ret) { - c_err("port %d: event handler register failed\n", i); - vema_unregister(cport); - return ret; - } + ib_register_event_handler(&port->event_handler); idr_init(&port->vport_idr); mutex_init(&port->lock); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 402275be0931..9e8e9220f816 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2238,7 +2238,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) cqe, first_wr); cqe = NULL; } - + ret = ib_post_send(ch->qp, first_wr, &bad_wr); if (ret) { pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", @@ -2530,8 +2530,7 @@ static void srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - if (ib_register_event_handler(&sdev->event_handler)) - goto err_cm; + ib_register_event_handler(&sdev->event_handler); sdev->ioctx_ring = (struct srpt_recv_ioctx **) srpt_alloc_ioctx_ring(sdev, sdev->srq_size, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c155c105589d..e536a052e5dd 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2413,8 +2413,8 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask, enum rdma_link_layer ll); -int ib_register_event_handler (struct ib_event_handler *event_handler); -int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_register_event_handler(struct ib_event_handler *event_handler); +void ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); int ib_query_port(struct ib_device *device, -- cgit v1.2.3 From cdc596d89e39521cf412209a19b8baeb8d788cdc Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:38 +0300 Subject: RDMA/core: Delete BUG() from unreachable flow Remove call to BUG() in case wrong node_type was provided. This flow is unreachable, because node_types are supplied from specific enum. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 6a7cbc1540aa..4c1485e01c83 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -180,21 +180,15 @@ EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { - switch (node_type) { - case RDMA_NODE_IB_CA: - case RDMA_NODE_IB_SWITCH: - case RDMA_NODE_IB_ROUTER: - return RDMA_TRANSPORT_IB; - case RDMA_NODE_RNIC: - return RDMA_TRANSPORT_IWARP; - case RDMA_NODE_USNIC: + + if (node_type == RDMA_NODE_USNIC) return RDMA_TRANSPORT_USNIC; - case RDMA_NODE_USNIC_UDP: + if (node_type == RDMA_NODE_USNIC_UDP) return RDMA_TRANSPORT_USNIC_UDP; - default: - BUG(); - return 0; - } + if (node_type == RDMA_NODE_RNIC) + return RDMA_TRANSPORT_IWARP; + + return RDMA_TRANSPORT_IB; } EXPORT_SYMBOL(rdma_node_get_transport); -- cgit v1.2.3 From 82901e3eb8f72326b06ac8c7285462d90cd3a56c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:39 +0300 Subject: RDMA/core: Refactor get link layer wrapper The return values from rdma_node_get_transport() are strict and IB_LINK_LAYER_UNSPECIFIED is unreachable in this flow. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4c1485e01c83..b29d0ff94463 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -194,19 +194,15 @@ EXPORT_SYMBOL(rdma_node_get_transport); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) { + enum rdma_transport_type lt; if (device->get_link_layer) return device->get_link_layer(device, port_num); - switch (rdma_node_get_transport(device->node_type)) { - case RDMA_TRANSPORT_IB: + lt = rdma_node_get_transport(device->node_type); + if (lt == RDMA_TRANSPORT_IB) return IB_LINK_LAYER_INFINIBAND; - case RDMA_TRANSPORT_IWARP: - case RDMA_TRANSPORT_USNIC: - case RDMA_TRANSPORT_USNIC_UDP: - return IB_LINK_LAYER_ETHERNET; - default: - return IB_LINK_LAYER_UNSPECIFIED; - } + + return IB_LINK_LAYER_ETHERNET; } EXPORT_SYMBOL(rdma_port_get_link_layer); -- cgit v1.2.3 From 31f97bad06446ca2d929a363a4b5cd494f276ba5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:40 +0300 Subject: RDMA/mlx4: Remove gfp_mask argument from acquire_group call All callers of acquire_group() passed the same gfp_mask to it and it is safe to remove it. Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mcg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index b73f89700ef9..70eb9f917303 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -808,8 +808,7 @@ static ssize_t sysfs_show_group(struct device *dev, struct device_attribute *attr, char *buf); static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, - union ib_gid *mgid, int create, - gfp_t gfp_mask) + union ib_gid *mgid, int create) { struct mcast_group *group, *cur_group; int is_mgid0; @@ -825,7 +824,7 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, if (!create) return ERR_PTR(-ENOENT); - group = kzalloc(sizeof *group, gfp_mask); + group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -892,7 +891,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, case IB_MGMT_METHOD_GET_RESP: case IB_SA_METHOD_DELETE_RESP: mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL); + group = acquire_group(ctx, &rec->mgid, 0); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) { @@ -954,7 +953,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, req->sa_mad = *sa_mad; mutex_lock(&ctx->mcg_table_lock); - group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL); + group = acquire_group(ctx, &rec->mgid, may_create); mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { kfree(req); -- cgit v1.2.3 From 5d50f400e56fbc7a14ef3f8d42ba47710e455881 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:41 +0300 Subject: RDMA/usnic: Fix remove address space warning Sparse tool complains with the following error: drivers/infiniband/hw/usnic/usnic_ib_main.c:445:16: warning: cast removes address space of expression Fix it by doing casting on correct field and convert function helper which sets ifaddr to be void, because there are no users who are interested in returned value. Fixes: c7845bcafe4d ("IB/usnic: Add UDP support in u*verbs.c, u*main.c and u*util.h") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_fwd.c | 12 ++---------- drivers/infiniband/hw/usnic/usnic_fwd.h | 2 +- drivers/infiniband/hw/usnic/usnic_ib_main.c | 10 ++++++---- 3 files changed, 9 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c index 3c37dd59c04e..995a26b65156 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.c +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -110,20 +110,12 @@ void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) spin_unlock(&ufdev->lock); } -int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) +void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) { - int status; - spin_lock(&ufdev->lock); - if (ufdev->inaddr == 0) { + if (!ufdev->inaddr) ufdev->inaddr = inaddr; - status = 0; - } else { - status = -EFAULT; - } spin_unlock(&ufdev->lock); - - return status; } void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev) diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h index b2ac22be0731..0b2cc4e79707 100644 --- a/drivers/infiniband/hw/usnic/usnic_fwd.h +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -75,7 +75,7 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); -int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); +void usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index e86700f994cb..f45e99a938e0 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -351,7 +351,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) { struct usnic_ib_dev *us_ibdev; union ib_gid gid; - struct in_ifaddr *in; + struct in_device *ind; struct net_device *netdev; usnic_dbg("\n"); @@ -441,9 +441,11 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (netif_carrier_ok(us_ibdev->netdev)) usnic_fwd_carrier_up(us_ibdev->ufdev); - in = ((struct in_device *)(netdev->ip_ptr))->ifa_list; - if (in != NULL) - usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address); + ind = in_dev_get(netdev); + if (ind->ifa_list) + usnic_fwd_add_ipaddr(us_ibdev->ufdev, + ind->ifa_list->ifa_address); + in_dev_put(ind); usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, us_ibdev->ufdev->inaddr, &gid.raw[0]); -- cgit v1.2.3 From 623248955d985b86cd778c63efe6a7ad03a018c4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:42 +0300 Subject: RDMA/mthca: Make explicit conversion to 64bit value The "lg" variable is declared as int so in all places where this variable is used as a shift operand, the output will be int too. This produces the following smatch warning: drivers/infiniband/hw/mthca/mthca_cmd.c:701 mthca_map_cmd() warn: should '1 << lg' be a 64 bit type? Simple declaration of "1" to be "1ULL" will fix the issue. Signed-off-by: Leon Romanovsky Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index a1900d2a371b..419a2a20c047 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -698,7 +698,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); - virt += 1 << lg; + virt += 1ULL << lg; } pages[nent * 2 + 1] = -- cgit v1.2.3 From faa9141c2268ccbd469bd876ba97e98b38f50fae Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Thu, 17 Aug 2017 15:50:43 +0300 Subject: IB/mlx4: Fix some spelling mistakes Fix spelling mistakes in remarks "retrun"->"return" "cancell"->"cancel" Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/cq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0e4f60cfc59d..155b4dfc0ae8 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -781,7 +781,7 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) spin_lock_irqsave(&dev->sriov.going_down_lock, flags); spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) { - /* If there is pending one should cancell then run, otherwise + /* If there is pending one should cancel then run, otherwise * won't run till previous one is ended as same work * struct is used. */ diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 95382faa7ad1..cab796341697 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -637,7 +637,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, struct mlx4_ib_qp *qp; *npolled = 0; - /* Find uncompleted WQEs belonging to that cq and retrun + /* Find uncompleted WQEs belonging to that cq and return * simulated FLUSH_ERR completions */ list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { -- cgit v1.2.3 From 4edf8d5caf52968f3ebb58140835cfa1cfd72a2d Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Thu, 17 Aug 2017 15:50:44 +0300 Subject: IB/mlx5: Fix some spelling mistakes Fix spelling mistakes in remarks "retrun"->"return" "Decalring"->"Declaring" Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/hw/mlx5/mad.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index c155df465c44..2aa53f427685 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -499,7 +499,7 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, struct mlx5_ib_qp *qp; *npolled = 0; - /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ + /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { sw_send_comp(qp, num_entries, wc + *npolled, npolled); if (*npolled >= num_entries) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 18cfe5bf0fa3..1003b0133a49 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -204,7 +204,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, int err; void *out_cnt; - /* Decalring support of extended counters */ + /* Declaring support of extended counters */ if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { struct ib_class_port_info cpi = {}; -- cgit v1.2.3 From 4a5fd5d2965cb889f30ad94a3bfd83da70aa2c9c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 17 Aug 2017 15:50:45 +0300 Subject: IB/mlx5: Add necessary delay drop assignment Assign the statistics and configuration structure pointer on success. Fixes: fe248c3a5837 ('IB/mlx5: Add delay drop configuration and statistics') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 55045e204518..1a67668f092f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3793,6 +3793,8 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) if (!dbg->timeout_debugfs) goto out_debugfs; + dev->delay_drop.dbg = dbg; + return 0; out_debugfs: -- cgit v1.2.3 From c3f1ee292f23cf49b43c0e9cf8c52bec821b6649 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:46 +0300 Subject: IB/mlx4: Fix RSS QP type in creation verb The mlx4 was designed to support QP type of MLX4_IB_QPT_RAW_PACKET. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f7ee1792ba6d..0d2923c2225f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -748,7 +748,7 @@ static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); - qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE; + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; qp->state = IB_QPS_RESET; /* Set dummy send resources to be compatible with HV and PRM */ -- cgit v1.2.3 From 078b3573030346df0cdc46d798c0f434dc53c2cc Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:47 +0300 Subject: IB/mlx4: Fix struct mlx4_ib_create_wq alignment The mlx4 ABI defines to have structures with alignment of 64B. Fixes: 400b1ebcfe31 ("IB/mlx4: Add support for WQ related verbs") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 9 ++++----- include/uapi/rdma/mlx4-abi.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0d2923c2225f..c3958fcfed75 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1046,9 +1046,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (src == MLX4_IB_RWQ_SRC) { - if (ucmd.wq.comp_mask || ucmd.wq.reserved1 || - ucmd.wq.reserved[0] || ucmd.wq.reserved[1] || - ucmd.wq.reserved[2]) { + if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] || + ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) { pr_debug("user command isn't supported\n"); err = -EOPNOTSUPP; goto err; @@ -4146,8 +4145,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!(udata && pd->uobject)) return ERR_PTR(-EINVAL); - required_cmd_sz = offsetof(typeof(ucmd), reserved) + - sizeof(ucmd.reserved); + required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + + sizeof(ucmd.comp_mask); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); return ERR_PTR(-EINVAL); diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h index 21cce1a4c4dd..0e10102861b5 100644 --- a/include/uapi/rdma/mlx4-abi.h +++ b/include/uapi/rdma/mlx4-abi.h @@ -121,7 +121,6 @@ struct mlx4_ib_create_wq { __u8 log_range_size; __u8 reserved[3]; __u32 comp_mask; - __u32 reserved1; }; struct mlx4_ib_modify_wq { -- cgit v1.2.3 From f9bfea992e7d7557c40a58e8536ad8c6f177de25 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 17 Aug 2017 15:50:49 +0300 Subject: IB/mlx4: Check that reserved fields in mlx4_ib_create_qp_rss are zero According to mlx4 convention, need to fail the command due to a non-zero value in the user data which is expected to be zero. Fixes: 3078f5f1bd8b ("IB/mlx4: Add support for RSS QP") Signed-off-by: Guy Levi Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c3958fcfed75..1ac148ddb250 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -812,6 +812,9 @@ static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, return ERR_PTR(-EFAULT); } + if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved))) + return ERR_PTR(-EOPNOTSUPP); + if (ucmd.comp_mask || ucmd.reserved1) return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3 From 69956d83267e29e38cda7d506c4085932789fef2 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 17 Aug 2017 15:50:50 +0300 Subject: IB/ipoib: Sync between remove_one to sysfs calls that use rtnl_lock In order to avoid deadlock between sysfs functions (like create/delete child) and remove_one (both of them are using the sysfs lock and rtnl_lock) the driver will use a state mutex for sync. That will fix traces as the following: schedule+0x3e/0x90 kernfs_drain+0x75/0xf0 ? wait_woken+0x90/0x90 __kernfs_remove+0x12e/0x1c0 kernfs_remove+0x25/0x40 sysfs_remove_dir+0x57/0x90 kobject_del+0x22/0x60 device_del+0x195/0x230 pm_runtime_set_memalloc_noio+0xac/0xf0 netdev_unregister_kobject+0x71/0x80 rollback_registered_many+0x205/0x2f0 rollback_registered+0x31/0x40 unregister_netdevice_queue+0x58/0xb0 unregister_netdev+0x20/0x30 ipoib_remove_one+0xb7/0x240 [ib_ipoib] ib_unregister_device+0xbc/0x1b0 [ib_core] ib_unregister_mad_agent+0x29/0x30 [ib_core] mlx4_ib_remove+0x67/0x280 [mlx4_ib] INFO: task echo:24082 blocked for more than 120 seconds. Tainted: G OE 4.1.12-37.5.1.el6uek.x86_64 #2 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. Call Trace: schedule+0x3e/0x90 schedule_preempt_disabled+0xe/0x10 __mutex_lock_slowpath+0x95/0x110 ? _rcu_barrier+0x177/0x220 mutex_lock+0x23/0x40 rtnl_lock+0x15/0x20 netdev_run_todo+0x81/0x1f0 rtnl_unlock+0xe/0x10 ipoib_vlan_delete+0x12f/0x1c0 [ib_ipoib] delete_child+0x69/0x80 [ib_ipoib] dev_attr_store+0x20/0x30 sysfs_kf_write+0x41/0x50 Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 +++++++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 5 +++++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 22 ++++++++++++++++++---- 4 files changed, 31 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 16c1af1c706e..4a5c7a07a631 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -337,6 +337,7 @@ struct ipoib_dev_priv { struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; + struct mutex sysfs_mutex; struct rb_root path_tree; struct list_head path_list; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d69410c2ed97..14b62f7472b4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1506,9 +1506,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) return -EPERM; - if (!rtnl_trylock()) + if (!mutex_trylock(&priv->sysfs_mutex)) return restart_syscall(); + if (!rtnl_trylock()) { + mutex_unlock(&priv->sysfs_mutex); + return restart_syscall(); + } + ret = ipoib_set_mode(dev, buf); /* The assumption is that the function ipoib_set_mode returned @@ -1517,6 +1522,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, */ if (ret != -EBUSY) rtnl_unlock(); + mutex_unlock(&priv->sysfs_mutex); return (!ret || ret == -EBUSY) ? count : ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 344e8d3d47bd..5f143445daa9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1878,6 +1878,7 @@ static void ipoib_build_priv(struct net_device *dev) spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); + mutex_init(&priv->sysfs_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); @@ -2329,7 +2330,11 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) cancel_delayed_work(&priv->neigh_reap_task); flush_workqueue(priv->wq); + /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ + mutex_lock(&priv->sysfs_mutex); unregister_netdev(priv->dev); + mutex_unlock(&priv->sysfs_mutex); + rn->free_rdma_netdev(priv->dev); list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 081b33deff1b..9927cd6b7082 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -133,12 +133,20 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) snprintf(intf_name, sizeof intf_name, "%s.%04x", ppriv->dev->name, pkey); - if (!rtnl_trylock()) + if (!mutex_trylock(&ppriv->sysfs_mutex)) return restart_syscall(); + if (!rtnl_trylock()) { + mutex_unlock(&ppriv->sysfs_mutex); + return restart_syscall(); + } + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); - if (!priv) + if (!priv) { + rtnl_unlock(); + mutex_unlock(&ppriv->sysfs_mutex); return -ENOMEM; + } down_write(&ppriv->vlan_rwsem); @@ -164,8 +172,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) out: up_write(&ppriv->vlan_rwsem); - rtnl_unlock(); + mutex_unlock(&ppriv->sysfs_mutex); if (result) { free_netdev(priv->dev); @@ -188,8 +196,13 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) return -EPERM; - if (!rtnl_trylock()) + if (!mutex_trylock(&ppriv->sysfs_mutex)) + return restart_syscall(); + + if (!rtnl_trylock()) { + mutex_unlock(&ppriv->sysfs_mutex); return restart_syscall(); + } down_write(&ppriv->vlan_rwsem); list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { @@ -208,6 +221,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) } rtnl_unlock(); + mutex_unlock(&ppriv->sysfs_mutex); if (dev) { free_netdev(dev); -- cgit v1.2.3 From fab773cb51150ea29ec041d5d001845fac026a3b Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 17 Aug 2017 15:50:52 +0300 Subject: IB/rxe: Make rxe_counter_name static rxe_counter_name is used in rxe_hw_counters.c only. Make it static. Fixes: 0b1e5b99a48b ('IB/rxe: Add port protocol stats') Signed-off-by: Kamal Heib Reviewed-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 7ef90aad7dfd..6aeb7a165e46 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -33,7 +33,7 @@ #include "rxe.h" #include "rxe_hw_counters.h" -const char * const rxe_counter_name[] = { +static const char * const rxe_counter_name[] = { [RXE_CNT_SENT_PKTS] = "sent_pkts", [RXE_CNT_RCVD_PKTS] = "rcvd_pkts", [RXE_CNT_DUP_REQ] = "duplicate_request", -- cgit v1.2.3 From 84305d71d6ca9a2e6a33d9b403639f1cdeeee846 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:53 +0300 Subject: RDMA/mlx5: Limit scope of get vector affinity local function The mlx5_ib_get_vector_affinity() call is local to main.c file and there is no need to be declared globally visible. Fixes: 40b24403f33e ("mlx5: support ->get_vector_affinity") Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1a67668f092f..762ef6bf219e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3819,8 +3819,8 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -const struct cpumask *mlx5_ib_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) +static const struct cpumask * +mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); -- cgit v1.2.3 From 17bf1ad2e80db7c9bdafec9c2da72389247194e9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:54 +0300 Subject: RDMA/mlx4: Properly annotate link layer variable The rdma_port_get_link_layer() returns enum rdma_link_layer as a return value, hence it is better to store the return value in specially annotated variable and not in int. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 1ac148ddb250..2747abde2ea8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2725,19 +2725,17 @@ enum { static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED; struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (cur_state == new_state && cur_state == IB_QPS_RESET) { - ll = IB_LINK_LAYER_UNSPECIFIED; - } else { + if (cur_state != new_state || cur_state != IB_QPS_RESET) { int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; ll = rdma_port_get_link_layer(&dev->ib_dev, port); } -- cgit v1.2.3 From 4b7ee6781f6aab3d236021af36aa4c95a089f402 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 17 Aug 2017 15:50:55 +0300 Subject: RDMA/nes: Remove zeroed parameter from port query callback There is no need to explicitly zero parameters, because the structure requested to be filled already initialized to zeros. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_verbs.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index c2943e39d2f9..f0dc5f4aa177 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -481,21 +481,16 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); props->lid = 1; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; if (netif_queue_stopped(netdev)) props->state = IB_PORT_DOWN; else if (nesvnic->linkup) props->state = IB_PORT_ACTIVE; else props->state = IB_PORT_DOWN; - props->phys_state = 0; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = IB_WIDTH_4X; props->active_speed = IB_SPEED_SDR; props->max_msg_sz = 0x80000000; -- cgit v1.2.3 From 31a8236276c1328d6770c30cf0b3dc5c07ede65b Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Wed, 23 Aug 2017 08:37:21 +0300 Subject: IB/ipoib: Enable ioctl for to IPoIB rdma netdevs Adds support for ioctl callback in the RDMA netdevs to allow supporting functions not handled by the generic interface code. Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ee9f5d281b37..4c7a4887077f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -99,6 +99,8 @@ static struct net_device *ipoib_get_net_dev_by_params( const union ib_gid *gid, const struct sockaddr *addr, void *client_data); static int ipoib_set_mac(struct net_device *dev, void *addr); +static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd); static struct ib_client ipoib_client = { .name = "ipoib", @@ -1680,6 +1682,17 @@ out: return -ENOMEM; } +static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->rn_ops->ndo_do_ioctl) + return -EOPNOTSUPP; + + return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); +} + int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1834,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { .ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_mac_address = ipoib_set_mac, .ndo_get_stats64 = ipoib_get_stats, + .ndo_do_ioctl = ipoib_ioctl, }; static const struct net_device_ops ipoib_netdev_ops_vf = { @@ -1847,6 +1861,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, .ndo_get_stats64 = ipoib_get_stats, + .ndo_do_ioctl = ipoib_ioctl, }; void ipoib_setup_common(struct net_device *dev) -- cgit v1.2.3 From 05297b66ad874f6b650498a39af5a4e353e5ba19 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Tue, 22 Aug 2017 23:19:00 -0700 Subject: RDMA/vmw_pvrdma: Add RoCEv2 support The driver version is bumped for compatibility purposes. Also, send correct GID type during register to device. Added compatibility check macros for the device. Reviewed-by: Jorgen Hansen Reviewed-by: Aditya Sarwade Signed-off-by: Bryan Tan Signed-off-by: Adit Ranadive Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 2 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 28 +++++++++++++++++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 36 ++++++++++------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c | 7 +++++ 4 files changed, 51 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 8e2f0a11690f..663a0c301c43 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -194,6 +194,7 @@ struct pvrdma_dev { void *resp_slot; unsigned long flags; struct list_head device_link; + unsigned int dsr_version; /* Locking and interrupt information. */ spinlock_t cmd_lock; /* Command lock. */ @@ -444,6 +445,7 @@ void pvrdma_ah_attr_to_rdma(struct rdma_ah_attr *dst, const struct pvrdma_ah_attr *src); void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, const struct rdma_ah_attr *src); +u8 ib_gid_type_to_pvrdma(enum ib_gid_type gid_type); int pvrdma_uar_table_init(struct pvrdma_dev *dev); void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 09078ccfaec7..3a308ffe00bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -50,7 +50,15 @@ #include "pvrdma_verbs.h" -#define PVRDMA_VERSION 17 +/* + * PVRDMA version macros. Some new features require updates to PVRDMA_VERSION. + * These macros allow us to check for different features if necessary. + */ + +#define PVRDMA_ROCEV1_VERSION 17 +#define PVRDMA_ROCEV2_VERSION 18 +#define PVRDMA_VERSION PVRDMA_ROCEV2_VERSION + #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -123,6 +131,24 @@ #define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0) #define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1) +/* + * Version checks. This checks whether each version supports specific + * capabilities from the device. + */ + +#define PVRDMA_IS_VERSION17(_dev) \ + (_dev->dsr_version == PVRDMA_ROCEV1_VERSION && \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) + +#define PVRDMA_IS_VERSION18(_dev) \ + (_dev->dsr_version >= PVRDMA_ROCEV2_VERSION && \ + (_dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1 || \ + _dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2)) \ + +#define PVRDMA_SUPPORTED(_dev) \ + ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) && \ + (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev))) + enum pvrdma_pci_resource { PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 5b00156dff30..6ce709a67959 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -128,10 +128,14 @@ static int pvrdma_init_device(struct pvrdma_dev *dev) static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { + struct pvrdma_dev *dev = to_vdev(ibdev); struct ib_port_attr attr; int err; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE; + else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2) + immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; err = ib_query_port(ibdev, port_num, &attr); if (err) @@ -569,6 +573,7 @@ static void pvrdma_free_slots(struct pvrdma_dev *dev) static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, const union ib_gid *gid, + u8 gid_type, int index) { int ret; @@ -586,7 +591,7 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev, cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024); cmd_bind->vlan = 0xfff; cmd_bind->index = index; - cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1; + cmd_bind->gid_type = gid_type; ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) { @@ -607,7 +612,9 @@ static int pvrdma_add_gid(struct ib_device *ibdev, { struct pvrdma_dev *dev = to_vdev(ibdev); - return pvrdma_add_gid_at_index(dev, gid, index); + return pvrdma_add_gid_at_index(dev, gid, + ib_gid_type_to_pvrdma(attr->gid_type), + index); } static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index) @@ -722,7 +729,6 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, int ret; unsigned long start; unsigned long len; - unsigned int version; dma_addr_t slot_dma = 0; dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev)); @@ -819,13 +825,9 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, goto err_unmap_regs; } - version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); + dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION); dev_info(&pdev->dev, "device version %d, driver version %d\n", - version, PVRDMA_VERSION); - if (version < PVRDMA_VERSION) { - dev_err(&pdev->dev, "incompatible device version\n"); - goto err_uar_unmap; - } + dev->dsr_version, PVRDMA_VERSION); dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), &dev->dsrbase, GFP_KERNEL); @@ -896,17 +898,9 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, /* Make sure the write is complete before reading status. */ mb(); - /* Currently, the driver only supports RoCE mode. */ - if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) { - dev_err(&pdev->dev, "unsupported transport %d\n", - dev->dsr->caps.mode); - ret = -EFAULT; - goto err_free_cq_ring; - } - - /* Currently, the driver only supports RoCE V1. */ - if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) { - dev_err(&pdev->dev, "driver needs RoCE v1 support\n"); + /* The driver supports RoCE V1 and V2. */ + if (!PVRDMA_SUPPORTED(dev)) { + dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n"); ret = -EFAULT; goto err_free_cq_ring; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index ec6a4ca1eeb7..fb0c5c0976b3 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -303,3 +303,10 @@ void rdma_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst, dst->port_num = rdma_ah_get_port_num(src); memcpy(&dst->dmac, src->roce.dmac, sizeof(dst->dmac)); } + +u8 ib_gid_type_to_pvrdma(enum ib_gid_type gid_type) +{ + return (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? + PVRDMA_GID_TYPE_FLAG_ROCE_V2 : + PVRDMA_GID_TYPE_FLAG_ROCE_V1; +} -- cgit v1.2.3 From a31a2a3b27f18bce0830581057a3fff75b830924 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Tue, 22 Aug 2017 23:19:01 -0700 Subject: RDMA/vmw_pvrdma: Update device query parameters and port caps Added support for two device caps - max_sge_rd, max_fast_reg_page_list_len and the IP_BASED_GIDS port cap flag. Reviewed-by: Jorgen Hansen Reviewed-by: Bryan Tan Reviewed-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 9 ++++++++- drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 3a308ffe00bd..df0a6b525021 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -149,6 +149,13 @@ ((_dev->dsr->caps.mode == PVRDMA_DEVICE_MODE_ROCE) && \ (PVRDMA_IS_VERSION17(_dev) || PVRDMA_IS_VERSION18(_dev))) +/* + * Get capability values based on device version. + */ + +#define PVRDMA_GET_CAP(_dev, _old_val, _val) \ + ((PVRDMA_IS_VERSION18(_dev)) ? _val : _old_val) + enum pvrdma_pci_resource { PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */ @@ -251,7 +258,7 @@ struct pvrdma_device_caps { u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */ u8 bmme_flags; /* FRWR Mem Mgmt Extensions */ u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */ - u8 reserved[4]; + u32 max_fast_reg_page_list_len; }; struct pvrdma_ring_page_info { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index 28517042011d..48776f5ffb0e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -83,6 +83,8 @@ int pvrdma_query_device(struct ib_device *ibdev, props->max_qp_wr = dev->dsr->caps.max_qp_wr; props->device_cap_flags = dev->dsr->caps.device_cap_flags; props->max_sge = dev->dsr->caps.max_sge; + props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, + dev->dsr->caps.max_sge_rd); props->max_cq = dev->dsr->caps.max_cq; props->max_cqe = dev->dsr->caps.max_cqe; props->max_mr = dev->dsr->caps.max_mr; @@ -101,8 +103,14 @@ int pvrdma_query_device(struct ib_device *ibdev, (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev, + PVRDMA_MAX_FAST_REG_PAGES, + dev->dsr->caps.max_fast_reg_page_list_len); } + props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_RC_RNR_NAK_GEN; + return 0; } @@ -143,6 +151,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = resp->attrs.gid_tbl_len; props->port_cap_flags = pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); + props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->max_msg_sz = resp->attrs.max_msg_sz; props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; -- cgit v1.2.3 From 61e0962d52216f2e5bab59bb055f1210e41f484f Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 23 Aug 2017 01:08:07 -0700 Subject: IB: Avoid ib_modify_port() failure for RoCE devices IB CM calls ib_modify_port() irrespective of link layer. If the failure is returned, the mad agent gets unregistered for those devices. Recently, modify_port() hook was removed from some of the low level drivers as it was always returning success. This breaks rdma connection establishment over those devices. For ethernet devices, Qkey violation and port capabilities are not applicable. So returning success for RoCE when modify_port hook is is not implemented. Cc: Leon Romanovsky Signed-off-by: Selvin Xavier Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index fc6be1175183..2466ffc6362d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1005,14 +1005,17 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { - if (!device->modify_port) - return -ENOSYS; + int rc; if (!rdma_is_port_valid(device, port_num)) return -EINVAL; - return device->modify_port(device, port_num, port_modify_mask, - port_modify); + if (device->modify_port) + rc = device->modify_port(device, port_num, port_modify_mask, + port_modify); + else + rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS; + return rc; } EXPORT_SYMBOL(ib_modify_port); -- cgit v1.2.3 From 847cb1a3566ca5b47b94a8e90f881b533f35cf07 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Aug 2017 09:25:53 +0100 Subject: RDMA/qedr: fix spelling mistake: "invlaid" -> "invalid" Trivial fix to spelling mistake in DP_ERR error message Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Reviewed-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9ee2dce3e5bb..769ac07c3c8e 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -491,7 +491,7 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, (udata && context) ? "User Lib" : "Kernel"); if (!dev->rdma_ctx) { - DP_ERR(dev, "invlaid RDMA context\n"); + DP_ERR(dev, "invalid RDMA context\n"); return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From accbef5cc624be745c1de903dd3a05681aaa0ac1 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Thu, 24 Aug 2017 20:11:42 +0300 Subject: RDMA/i40iw: Remove unused argument None of the calls to i40iw_netdev_vlan_ipv6 are using mac so let's remove it from func's args-list. Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index a2b135039e5a..14f36ba4e5be 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1582,15 +1582,14 @@ static enum i40iw_status_code i40iw_del_multiple_qhash( } /** - * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac + * i40iw_netdev_vlan_ipv6 - Gets the netdev and vlan * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address - * @mac: mac address for the given IPv6 address * * Returns the net_device of the IPv6 address and also sets the - * vlan id and mac for that address. + * vlan id for that address. */ -static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) +static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id) { struct net_device *ip_dev = NULL; struct in6_addr laddr6; @@ -1600,15 +1599,11 @@ static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *ma i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) *vlan_id = I40IW_NO_VLAN; - if (mac) - eth_zero_addr(mac); rcu_read_lock(); for_each_netdev_rcu(&init_net, ip_dev) { if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) { if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(ip_dev); - if (ip_dev->dev_addr && mac) - ether_addr_copy(mac, ip_dev->dev_addr); break; } } @@ -3588,7 +3583,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; - i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL); + i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id); } i40iw_debug(cm_node->dev, I40IW_DEBUG_CM, @@ -3787,7 +3782,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) raddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); - i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL); + i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id); } cm_info.cm_id = cm_id; cm_info.tos = cm_id->tos; @@ -3929,8 +3924,7 @@ int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_info.loc_port = ntohs(laddr6->sin6_port); if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) i40iw_netdev_vlan_ipv6(cm_info.loc_addr, - &cm_info.vlan_id, - NULL); + &cm_info.vlan_id); else wildcard = true; } -- cgit v1.2.3 From 96dc3fc5f1d66b20cdf839d571c7b907e08d5d00 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Thu, 17 Aug 2017 15:52:28 +0300 Subject: IB/mlx5: Expose software parsing for Raw Ethernet QP Software parsing (SWP) is a feature that can be used to instruct the device to stop using its internal parser and to parse packets on the transmit path according to offsets set for each packets. Through this feature, the device allows the handling of checksum and LSO by the hardware according to the location of IP and TCP/UDP headers. Enable SW parsing on Raw Ethernet send queue by default if firmware supports it and report these capabilities to user space. Signed-off-by: Noa Osherovich Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ include/uapi/rdma/mlx5-abi.h | 17 +++++++++++++++++ 3 files changed, 41 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 762ef6bf219e..07789450ec42 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -811,6 +811,27 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), reserved, uhw->outlen)) resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), sw_parsing_caps, + uhw->outlen)) { + resp.response_length += sizeof(resp.sw_parsing_caps); + if (MLX5_CAP_ETH(mdev, swp)) { + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING; + + if (MLX5_CAP_ETH(mdev, swp_csum)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_CSUM; + + if (MLX5_CAP_ETH(mdev, swp_lso)) + resp.sw_parsing_caps.sw_parsing_offloads |= + MLX5_IB_SW_PARSING_LSO; + + if (resp.sw_parsing_caps.sw_parsing_offloads) + resp.sw_parsing_caps.supported_qpts = + BIT(IB_QPT_RAW_PACKET); + } + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bc49d14e0a00..656773196f27 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1088,6 +1088,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, tis_num_0, sq->tisn); + if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && + MLX5_CAP_ETH(dev->mdev, swp)) + MLX5_SET(sqc, sqc, allow_swp, 1); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 0b3d30837a9f..64d398e662cd 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,22 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_sw_parsing_offloads { + MLX5_IB_SW_PARSING = 1 << 0, + MLX5_IB_SW_PARSING_CSUM = 1 << 1, + MLX5_IB_SW_PARSING_LSO = 1 << 2, +}; + +struct mlx5_ib_sw_parsing_caps { + __u32 sw_parsing_offloads; /* enum mlx5_ib_sw_parsing_offloads */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; @@ -177,6 +193,7 @@ struct mlx5_ib_query_device_resp { struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; + struct mlx5_ib_sw_parsing_caps sw_parsing_caps; }; struct mlx5_ib_create_cq { -- cgit v1.2.3 From 8b7ff7f3b301de52924cb2cf3fed47b181893116 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:29 +0300 Subject: IB/mlx5: Enable UMR for MRs created with reg_create This patch is the first step in decoupling UMR usage and allocation from the MR cache. The only functional change in this patch is to enables UMR for MRs created with reg_create. This change fixes a bug where ODP memory regions that were not allocated from the MR cache did not have UMR enabled. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/infiniband/hw/mlx5/mr.c | 33 ++++++++++++++------------------- include/linux/mlx5/driver.h | 2 +- 3 files changed, 16 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 7ac991070020..b3380e8beacf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -503,7 +503,7 @@ struct mlx5_ib_mr { struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; - int umred; + bool allocated_from_cache; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index a0eb2f96179a..bc87016021e3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -48,8 +48,7 @@ enum { #define MLX5_UMR_ALIGN 2048 static int clean_mr(struct mlx5_ib_mr *mr); -static int max_umr_order(struct mlx5_ib_dev *dev); -static int use_umr(struct mlx5_ib_dev *dev, int order); +static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) @@ -184,7 +183,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr->order = ent->order; - mr->umred = 1; + mr->allocated_from_cache = 1; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); @@ -497,7 +496,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) int i; c = order2idx(dev, order); - last_umr_cache_entry = order2idx(dev, max_umr_order(dev)); + last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); if (c < 0 || c > last_umr_cache_entry) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; @@ -677,12 +676,12 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); queue_work(cache->wq, &ent->work); - if (i > MAX_UMR_CACHE_ENTRY) { + if (i > MR_CACHE_LAST_STD_ENTRY) { mlx5_odp_init_mr_cache_entry(ent); continue; } - if (!use_umr(dev, ent->order)) + if (ent->order > mr_cache_max_order(dev)) continue; ent->page = PAGE_SHIFT; @@ -819,18 +818,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size) return (npages + 1) / 2; } -static int max_umr_order(struct mlx5_ib_dev *dev) +static int mr_cache_max_order(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) - return MAX_UMR_CACHE_ENTRY + 2; + return MR_CACHE_LAST_STD_ENTRY + 2; return MLX5_MAX_UMR_SHIFT; } -static int use_umr(struct mlx5_ib_dev *dev, int order) -{ - return order <= max_umr_order(dev); -} - static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int access_flags, struct ib_umem **umem, int *npages, int *page_shift, int *ncont, @@ -1149,6 +1143,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); @@ -1231,7 +1226,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - if (use_umr(dev, order)) { + if (order <= mr_cache_max_order(dev)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { @@ -1355,7 +1350,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, /* * UMR can't be used - MKey needs to be replaced. */ - if (mr->umred) { + if (mr->allocated_from_cache) { err = unreg_umr(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to unregister MR\n"); @@ -1373,7 +1368,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (IS_ERR(mr)) return PTR_ERR(mr); - mr->umred = 0; + mr->allocated_from_cache = 0; } else { /* * Send a UMR WQE @@ -1461,7 +1456,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr) static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int umred = mr->umred; + int allocated_from_cache = mr->allocated_from_cache; int err; if (mr->sig) { @@ -1479,7 +1474,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); - if (!umred) { + if (!allocated_from_cache) { err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", @@ -1490,7 +1485,7 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_mr_cache_free(dev, mr); } - if (!umred) + if (!allocated_from_cache) kfree(mr); return 0; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index db40bc4055c7..99d88624ad07 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,7 +1093,7 @@ enum { }; enum { - MAX_UMR_CACHE_ENTRY = 20, + MR_CACHE_LAST_STD_ENTRY = 20, MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MR_CACHE_ENTRIES -- cgit v1.2.3 From ff740aefecb98da4605df1cada7904c44eaee161 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:30 +0300 Subject: IB/mlx5: Decouple MR allocation and population flows mlx5 compatible devices have two ways of populating the MTT table of an MKEY: using a FW command and using a UMR WQE. A UMR is much faster, so it should be used whenever possible. Unfortunately the code today uses UMR only if the MKEY was allocated from the MR cache. Fix the code to use UMR even for MKEYs that were allocated using a FW command. Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 75 ++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bc87016021e3..aa6f71570b77 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -898,7 +898,8 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, return err; } -static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, +static struct mlx5_ib_mr *alloc_mr_from_cache( + struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, u64 len, int npages, int page_shift, int order, int access_flags) { @@ -930,16 +931,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, mr->mmkey.size = len; mr->mmkey.pd = to_mpd(pd)->pdn; - err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, - MLX5_IB_UPD_XLT_ENABLE); - - if (err) { - mlx5_mr_cache_free(dev, mr); - return ERR_PTR(err); - } - - mr->live = 1; - return mr; } @@ -1105,7 +1096,8 @@ free_xlt: static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, u64 virt_addr, u64 length, struct ib_umem *umem, int npages, - int page_shift, int access_flags) + int page_shift, int access_flags, + bool populate) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; @@ -1120,15 +1112,19 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + - sizeof(*pas) * ((npages + 1) / 2) * 2; + mr->ibmr.pd = pd; + mr->access_flags = access_flags; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + if (populate) + inlen += sizeof(*pas) * roundup(npages, 2); in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (!(access_flags & IB_ACCESS_ON_DEMAND)) + if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); @@ -1137,6 +1133,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); @@ -1153,8 +1150,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, 1 << page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + if (populate) { + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + } err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { @@ -1163,9 +1162,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, } mr->mmkey.type = MLX5_MKEY_MR; mr->desc_size = sizeof(struct mlx5_mtt); - mr->umem = umem; mr->dev = dev; - mr->live = 1; kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); @@ -1205,6 +1202,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, int ncont; int order; int err; + bool use_umr = true; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); @@ -1223,27 +1221,29 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (err < 0) + if (err < 0) return ERR_PTR(err); if (order <= mr_cache_max_order(dev)) { - mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, - order, access_flags); + mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, + page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } - } else if (access_flags & IB_ACCESS_ON_DEMAND && - !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); - goto error; + } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { + if (access_flags & IB_ACCESS_ON_DEMAND) { + err = -EINVAL; + pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); + goto error; + } + use_umr = false; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags); + page_shift, access_flags, !use_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1261,8 +1261,22 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, update_odp_mr(mr); #endif - return &mr->ibmr; + if (use_umr) { + int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; + + if (access_flags & IB_ACCESS_ON_DEMAND) + update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; + err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, + update_xlt_flags); + if (err) { + mlx5_ib_dereg_mr(&mr->ibmr); + return ERR_PTR(err); + } + } + + mr->live = 1; + return &mr->ibmr; error: ib_umem_release(umem); return ERR_PTR(err); @@ -1363,12 +1377,13 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, - page_shift, access_flags); + page_shift, access_flags, true); if (IS_ERR(mr)) return PTR_ERR(mr); mr->allocated_from_cache = 0; + mr->live = 1; } else { /* * Send a UMR WQE -- cgit v1.2.3 From 5942d8ae411775b76e5e1ab0cce57b0666516f2d Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 17 Aug 2017 15:52:31 +0300 Subject: IB/mlx5: Fix memory leak in clean_mr error path In clean_mr error path the 'mr' should be freed. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Kamal Heib Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index aa6f71570b77..8cf53eb13148 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1490,19 +1490,19 @@ static int clean_mr(struct mlx5_ib_mr *mr) mlx5_free_priv_descs(mr); if (!allocated_from_cache) { + u32 key = mr->mmkey.key; + err = destroy_mkey(dev, mr); + kfree(mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", - mr->mmkey.key, err); + key, err); return err; } } else { mlx5_mr_cache_free(dev, mr); } - if (!allocated_from_cache) - kfree(mr); - return 0; } -- cgit v1.2.3 From 7b4cdaae73ee833975a767cf54a3354d355b3f8d Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Thu, 17 Aug 2017 15:52:32 +0300 Subject: IB/mlx5: Fix integer overflow when page_shift == 31 Fix a bug where MR registration fails when mlx5_ib_cont_pages indicates that the MR can be mapped using 2GB pages (page_shift == 31). Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Ilya Lesokhin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 8cf53eb13148..0e2789d9bb4d 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -808,13 +808,14 @@ err_free: return ERR_PTR(err); } -static int get_octo_len(u64 addr, u64 len, int page_size) +static int get_octo_len(u64 addr, u64 len, int page_shift) { + u64 page_size = 1ULL << page_shift; u64 offset; int npages; offset = addr & (page_size - 1); - npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + npages = ALIGN(len + offset, page_size) >> page_shift; return (npages + 1) / 2; } @@ -1147,12 +1148,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + get_octo_len(virt_addr, length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, - get_octo_len(virt_addr, length, 1 << page_shift)); + get_octo_len(virt_addr, length, page_shift)); } err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); -- cgit v1.2.3 From a550ddfc543e250798048cf4eabe721cd85ac724 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Thu, 17 Aug 2017 15:52:33 +0300 Subject: IB/mlx5: Add support for multi underlay QP Set underlay QPN as part of flow rule when it's applicable. There is one root flow table in the NIC RX namespace and all the underlay QPs steer the traffic to this flow table. In order to prevent QP to get traffic which is not target to its underlay QP, we need to set the underlay QP number as part of the steering matching. Note: When multicast traffic is sent the QPN filtering is done by the firmware as some early step. Adding the QPN match on the flow table entry is wrong as by that time the target QPN holds the multicast address (e.g. FF(s)) and it won't match. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 49 +++++++++++++++++++++++++++++++++------ include/linux/mlx5/mlx5_ifc.h | 8 +++++-- 2 files changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 07789450ec42..8f7e46090f9a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2125,7 +2125,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, * it won't fall into the multicast flow steering table and this rule * could steal other multicast packets. */ -static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) +static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) { union ib_flow_spec *flow_spec; @@ -2337,10 +2337,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, return err ? ERR_PTR(err) : prio; } -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) +static void set_underlay_qp(struct mlx5_ib_dev *dev, + struct mlx5_flow_spec *spec, + u32 underlay_qpn) +{ + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (underlay_qpn && + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, + ft_field_support.bth_dst_qp)) { + MLX5_SET(fte_match_set_misc, + misc_params_v, bth_dst_qp, underlay_qpn); + MLX5_SET(fte_match_set_misc, + misc_params_c, bth_dst_qp, 0xffffff); + } +} + +static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst, + u32 underlay_qpn) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; @@ -2376,6 +2397,9 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } + if (!flow_is_multicast_only(flow_attr)) + set_underlay_qp(dev, spec, underlay_qpn); + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); if (is_drop) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; @@ -2415,6 +2439,14 @@ free: return err ? ERR_PTR(err) : handler; } +static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + const struct ib_flow_attr *flow_attr, + struct mlx5_flow_destination *dst) +{ + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); +} + static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, struct ib_flow_attr *flow_attr, @@ -2551,6 +2583,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; + int underlay_qpn; if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -2591,8 +2624,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, handler = create_dont_trap_rule(dev, ft_prio, flow_attr, dst); } else { - handler = create_flow_rule(dev, ft_prio, flow_attr, - dst); + underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? + mqp->underlay_qpn : 0; + handler = _create_flow_rule(dev, ft_prio, flow_attr, + dst, underlay_qpn); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5bae70eb25af..6563500c85de 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -295,8 +295,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_tcp_dport[0x1]; u8 inner_tcp_flags[0x1]; u8 reserved_at_37[0x9]; + u8 reserved_at_40[0x1a]; + u8 bth_dst_qp[0x1]; - u8 reserved_at_40[0x40]; + u8 reserved_at_5b[0x25]; }; struct mlx5_ifc_flow_table_prop_layout_bits { @@ -432,7 +434,9 @@ struct mlx5_ifc_fte_match_set_misc_bits { u8 reserved_at_100[0xc]; u8 inner_ipv6_flow_label[0x14]; - u8 reserved_at_120[0xe0]; + u8 reserved_at_120[0x28]; + u8 bth_dst_qp[0x18]; + u8 reserved_at_160[0xa0]; }; struct mlx5_ifc_cmd_pas_bits { -- cgit v1.2.3 From 795b609c8b59f8f20fa9d72bf8b4ae3b8aa5582c Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:34 +0300 Subject: IB/mlx5: Allow posting multi packet send WQEs if hardware supports Set the field to allow posting multi packet send WQEs if hardware supports this feature. This doesn't mean the send WQEs will be for multi packet unless the send WQE was prepared according to multi packet send WQE format. User space shall use flag MLX5_IB_ALLOW_MPW to check if hardware supports MPW and allows MPW in SQ context. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++-- drivers/infiniband/hw/mlx5/qp.c | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 5 +++++ 4 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8f7e46090f9a..ba0a97d6f677 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -802,8 +802,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, uhw->outlen)) { - resp.mlx5_ib_support_multi_pkt_send_wqes = - MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_IB_ALLOW_MPW; resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 656773196f27..d6df88a78d5e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1083,6 +1083,8 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) + MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6563500c85de..6865e60ba473 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2445,7 +2445,7 @@ struct mlx5_ifc_sqc_bits { u8 cd_master[0x1]; u8 fre[0x1]; u8 flush_in_error_en[0x1]; - u8 reserved_at_4[0x1]; + u8 allow_multi_pkt_send_wqe[0x1]; u8 min_wqe_inline_mode[0x3]; u8 state[0x4]; u8 reg_umr[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 64d398e662cd..a61a512e5747 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -168,6 +168,11 @@ struct mlx5_packet_pacing_caps { __u32 reserved; }; +enum mlx5_ib_mpw_caps { + MPW_RESERVED = 1 << 0, + MLX5_IB_ALLOW_MPW = 1 << 1, +}; + enum mlx5_ib_sw_parsing_offloads { MLX5_IB_SW_PARSING = 1 << 0, MLX5_IB_SW_PARSING_CSUM = 1 << 1, -- cgit v1.2.3 From 050da902adde8faf6b1bef15ac4876ae145358f4 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 17 Aug 2017 15:52:35 +0300 Subject: IB/mlx5: Report mlx5 enhanced multi packet WQE capability Expose enhanced multi packet WQE capability to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +++++ include/linux/mlx5/mlx5_ifc.h | 2 +- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ba0a97d6f677..62e6298810e7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -805,6 +805,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; + + if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + resp.mlx5_ib_support_multi_pkt_send_wqes |= + MLX5_IB_SUPPORT_EMPW; + resp.response_length += sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6865e60ba473..4eff0b8a1482 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -604,7 +604,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 rss_ind_tbl_cap[0x4]; u8 reg_umr_sq[0x1]; u8 scatter_fcs[0x1]; - u8 reserved_at_1a[0x1]; + u8 enhanced_multi_pkt_send_wqe[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; u8 tunnel_statless_gre[0x1]; diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index a61a512e5747..1791bf123ba9 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -171,6 +171,7 @@ struct mlx5_packet_pacing_caps { enum mlx5_ib_mpw_caps { MPW_RESERVED = 1 << 0, MLX5_IB_ALLOW_MPW = 1 << 1, + MLX5_IB_SUPPORT_EMPW = 1 << 2, }; enum mlx5_ib_sw_parsing_offloads { -- cgit v1.2.3 From 3aaee8ab47a2485ad53c0a2d94945f0105d9feab Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Aug 2017 18:26:14 -0700 Subject: IB/rdmavt: Use rvt_put_swqe() in rvt_clear_mr_ref() hfi1 and qib were converted in previous patches, do the same for rdmavt. Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6f6525d24a2f..3a238b00885e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -440,13 +440,9 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) if (clr_sends) { while (qp->s_last != qp->s_head) { struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; + rvt_put_swqe(wqe); - rvt_put_mr(sge->mr); - } if (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) -- cgit v1.2.3 From 5b0ef650bd0f820e922fcc42f1985d4621ae19cf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Aug 2017 18:26:20 -0700 Subject: IB/{qib, hfi1}: Avoid flow control testing for RDMA write operation Section 9.7.7.2.5 of the 1.3 IBTA spec clearly says that receive credits should never apply to RDMA write. qib and hfi1 were doing that. The following situation will result in a QP hang: - A prior SEND or RDMA_WRITE with immmediate consumed the last credit for a QP using RC receive buffer credits - The prior op is acked so there are no more acks - The peer ULP fails to post receive for some reason - An RDMA write sees that the credits are exhausted and waits - The peer ULP posts receive buffers - The ULP posts a send or RDMA write that will be hung The fix is to avoid the credit test for the RDMA write operation. Cc: Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 3 ++- drivers/infiniband/hw/qib/qib_rc.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 99defcc0ce45..e1cf0c08ca6f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -442,7 +442,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -450,6 +450,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } +no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, &ohdr->u.rc.reth); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 4ddbcac5eabe..e9a91736b12d 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -348,7 +348,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) case IB_WR_RDMA_WRITE: if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* FALLTHROUGH */ + goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && @@ -356,7 +356,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - +no_flow_control: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); ohdr->u.rc.reth.rkey = -- cgit v1.2.3 From 3b7169338c1caad4d17ed5d9ed412f813c919cd2 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:26:26 -0700 Subject: IB/qib: Remove unnecessary memory allocation for boardname Remove all the memory allocation implemented for boardname and directly assign the defined string literal. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba6120.c | 20 ++++++----------- drivers/infiniband/hw/qib/qib_iba7220.c | 22 +++++++----------- drivers/infiniband/hw/qib/qib_iba7322.c | 40 ++++++++++++++------------------- drivers/infiniband/hw/qib/qib_init.c | 1 - 4 files changed, 32 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 46045fc28fa0..3259a60e4f4f 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1742,38 +1742,32 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd) */ static void pe_boardname(struct qib_devdata *dd) { - char *n; - u32 boardid, namelen; + u32 boardid; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 2: - n = "InfiniPath_QLE7140"; + dd->boardname = "InfiniPath_QLE7140"; break; default: qib_dev_err(dd, "Unknown 6120 board with ID %u\n", boardid); - n = "Unknown_InfiniPath_6120"; + dd->boardname = "Unknown_InfiniPath_6120"; break; } - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) qib_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->majrev, dd->minrev); + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); - + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); } /* diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 49cd6e3beb72..04bdd3d487b1 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -2050,41 +2050,35 @@ static void qib_setup_7220_interrupt(struct qib_devdata *dd) */ static void qib_7220_boardname(struct qib_devdata *dd) { - char *n; - u32 boardid, namelen; + u32 boardid; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 1: - n = "InfiniPath_QLE7240"; + dd->boardname = "InfiniPath_QLE7240"; break; case 2: - n = "InfiniPath_QLE7280"; + dd->boardname = "InfiniPath_QLE7280"; break; default: qib_dev_err(dd, "Unknown 7220 board with ID %u\n", boardid); - n = "Unknown_InfiniPath_7220"; + dd->boardname = "Unknown_InfiniPath_7220"; break; } - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); - if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) qib_dev_err(dd, - "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->majrev, dd->minrev); + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); } /* diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 2653064ce9e9..92ae68c8e76f 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3582,75 +3582,69 @@ bail:; static unsigned qib_7322_boardname(struct qib_devdata *dd) { /* Will need enumeration of board-types here */ - char *n; - u32 boardid, namelen; - unsigned features = DUAL_PORT_CAP; + u32 boardid; + unsigned int features = DUAL_PORT_CAP; boardid = SYM_FIELD(dd->revision, Revision, BoardID); switch (boardid) { case 0: - n = "InfiniPath_QLE7342_Emulation"; + dd->boardname = "InfiniPath_QLE7342_Emulation"; break; case 1: - n = "InfiniPath_QLE7340"; + dd->boardname = "InfiniPath_QLE7340"; dd->flags |= QIB_HAS_QSFP; features = PORT_SPD_CAP; break; case 2: - n = "InfiniPath_QLE7342"; + dd->boardname = "InfiniPath_QLE7342"; dd->flags |= QIB_HAS_QSFP; break; case 3: - n = "InfiniPath_QMI7342"; + dd->boardname = "InfiniPath_QMI7342"; break; case 4: - n = "InfiniPath_Unsupported7342"; + dd->boardname = "InfiniPath_Unsupported7342"; qib_dev_err(dd, "Unsupported version of QMH7342\n"); features = 0; break; case BOARD_QMH7342: - n = "InfiniPath_QMH7342"; + dd->boardname = "InfiniPath_QMH7342"; features = 0x24; break; case BOARD_QME7342: - n = "InfiniPath_QME7342"; + dd->boardname = "InfiniPath_QME7342"; break; case 8: - n = "InfiniPath_QME7362"; + dd->boardname = "InfiniPath_QME7362"; dd->flags |= QIB_HAS_QSFP; break; case BOARD_QMH7360: - n = "Intel IB QDR 1P FLR-QSFP Adptr"; + dd->boardname = "Intel IB QDR 1P FLR-QSFP Adptr"; dd->flags |= QIB_HAS_QSFP; break; case 15: - n = "InfiniPath_QLE7342_TEST"; + dd->boardname = "InfiniPath_QLE7342_TEST"; dd->flags |= QIB_HAS_QSFP; break; default: - n = "InfiniPath_QLE73xy_UNKNOWN"; + dd->boardname = "InfiniPath_QLE73xy_UNKNOWN"; qib_dev_err(dd, "Unknown 7322 board type %u\n", boardid); break; } dd->board_atten = 1; /* index into txdds_Xdr */ - namelen = strlen(n) + 1; - dd->boardname = kmalloc(namelen, GFP_KERNEL); - if (dd->boardname) - snprintf(dd->boardname, namelen, "%s", n); - snprintf(dd->boardversion, sizeof(dd->boardversion), "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, - (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + (unsigned int)SYM_FIELD(dd->revision, Revision_R, Arch), dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + (unsigned int)SYM_FIELD(dd->revision, Revision_R, SW)); if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { qib_devinfo(dd->pcidev, - "IB%u: Forced to single port mode by module parameter\n", - dd->unit); + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); features &= PORT_SPD_CAP; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index cce4ab468b29..c5a4c65636d6 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1398,7 +1398,6 @@ static void cleanup_device_data(struct qib_devdata *dd) qib_free_ctxtdata(dd, rcd); } kfree(tmp); - kfree(dd->boardname); } /* -- cgit v1.2.3 From 27147273a68ecf5738ef87ccc6cd4bb8883fca84 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:26:32 -0700 Subject: IB/qib: Stricter bounds checking for copy and array access Added checking on index value of array 'guids' in qib_ruc.c. Pass in corrrect size of array for memset operation in qib_mad.c. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_mad.c | 4 ++-- drivers/infiniband/hw/qib/qib_ruc.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 549c71971a1f..82d9da9b6997 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1573,8 +1573,8 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; - memset(pmp->reserved, 0, sizeof(pmp->reserved) + - sizeof(pmp->data)); + memset(pmp->reserved, 0, sizeof(pmp->reserved)); + memset(pmp->data, 0, sizeof(pmp->data)); /* * Set top 3 bits to indicate interval in picoseconds in diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index e6a42a8972f5..53efbb0b40c4 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -645,8 +645,10 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, hdr->hop_limit = grh->hop_limit; /* The SGID is 32-bit aligned. */ hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; - hdr->sgid.global.interface_id = grh->sgid_index ? - ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid; + if (!grh->sgid_index) + hdr->sgid.global.interface_id = ppd_from_ibp(ibp)->guid; + else if (grh->sgid_index < QIB_GUIDS_PER_PORT) + hdr->sgid.global.interface_id = ibp->guids[grh->sgid_index - 1]; hdr->dgid = grh->dgid; /* GRH header size in 32-bit words. */ -- cgit v1.2.3 From de42de80d7bb4f82e30d16f5c64da90e64b6c644 Mon Sep 17 00:00:00 2001 From: Grzegorz Morys Date: Mon, 21 Aug 2017 18:26:38 -0700 Subject: IB/hfi1: Ratelimit prints from sdma_interrupt Ratelimit error prints from sdma_interrupt function that could swarm dmesg otherwise. Reviewed-by: Dennis Dalessandro Signed-off-by: Grzegorz Morys Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 4 ++-- drivers/infiniband/hw/hfi1/hfi.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index eb27c7fbfebd..b2ed4b9cda6e 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8290,8 +8290,8 @@ static irqreturn_t sdma_interrupt(int irq, void *data) /* handle the interrupt(s) */ sdma_engine_interrupt(sde, status); } else { - dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n", - sde->this_idx); + dd_dev_err_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n", + sde->this_idx); } return IRQ_HANDLED; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index eadb735d958c..3ac9c307a285 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2119,9 +2119,15 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) #define dd_dev_emerg(dd, fmt, ...) \ dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) + #define dd_dev_err(dd, fmt, ...) \ dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define dd_dev_err_ratelimited(dd, fmt, ...) \ + dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ + get_unit_name((dd)->unit), ##__VA_ARGS__) + #define dd_dev_warn(dd, fmt, ...) \ dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ get_unit_name((dd)->unit), ##__VA_ARGS__) -- cgit v1.2.3 From 7956371ea48bd00956219a82fd3af655dd216073 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 21 Aug 2017 18:26:45 -0700 Subject: IB/hfi1: Improve local kmem_cache_alloc performance Performance analysis shows that the cache callback function sdma_kmem_cache_ctor contributes to 1/2 of the kmem_cache_allocs time. Since all of the fields in the allocated data structure are initialized in the code path, remove the _ctor function. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 9 +-------- drivers/infiniband/hw/hfi1/verbs_txreq.c | 11 ++--------- drivers/infiniband/hw/hfi1/vnic_sdma.c | 13 +++---------- 3 files changed, 6 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index aae1f40016e4..9b89df584943 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -331,13 +331,6 @@ static void activate_packet_queue(struct iowait *wait, int reason) wake_up(&wait->wait_dma); }; -static void sdma_kmem_cache_ctor(void *obj) -{ - struct user_sdma_txreq *tx = obj; - - memset(tx, 0, sizeof(*tx)); -} - int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd) { @@ -391,7 +384,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, sizeof(struct user_sdma_txreq), L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, - sdma_kmem_cache_ctor); + NULL); if (!pq->txreq_cache) { dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n", uctxt->ctxt); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 5d23172c470f..873e48ea923f 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -119,13 +119,6 @@ out: return tx; } -static void verbs_txreq_kmem_cache_ctor(void *obj) -{ - struct verbs_txreq *tx = (struct verbs_txreq *)obj; - - memset(tx, 0, sizeof(*tx)); -} - int verbs_txreq_init(struct hfi1_ibdev *dev) { char buf[TXREQ_LEN]; @@ -135,7 +128,7 @@ int verbs_txreq_init(struct hfi1_ibdev *dev) dev->verbs_txreq_cache = kmem_cache_create(buf, sizeof(struct verbs_txreq), 0, SLAB_HWCACHE_ALIGN, - verbs_txreq_kmem_cache_ctor); + NULL); if (!dev->verbs_txreq_cache) return -ENOMEM; return 0; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 7815d7405462..c3c96c5869ed 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -303,22 +303,15 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo) } } -static void hfi1_vnic_txreq_kmem_cache_ctor(void *obj) -{ - struct vnic_txreq *tx = (struct vnic_txreq *)obj; - - memset(tx, 0, sizeof(*tx)); -} - int hfi1_vnic_txreq_init(struct hfi1_devdata *dd) { char buf[HFI1_VNIC_TXREQ_NAME_LEN]; snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit); dd->vnic.txreq_cache = kmem_cache_create(buf, - sizeof(struct vnic_txreq), - 0, SLAB_HWCACHE_ALIGN, - hfi1_vnic_txreq_kmem_cache_ctor); + sizeof(struct vnic_txreq), + 0, SLAB_HWCACHE_ALIGN, + NULL); if (!dd->vnic.txreq_cache) return -ENOMEM; return 0; -- cgit v1.2.3 From 9dc117095570e7a230f8f9229a70713c335d9e2a Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:26:51 -0700 Subject: IB/hfi1: Clean up hfi1_user_exp_rcv_setup function Clean up hfi1_user_exp_rcv_setup function by moving page pinning and unpinning related code to separate functions. In order to reduce the number of parameters passed between functions, a new data structure struct tid_user_buf is defined and used. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 232 ++++++++++++++++++------------ drivers/infiniband/hw/hfi1/user_exp_rcv.h | 9 ++ 2 files changed, 153 insertions(+), 88 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index d9036ba3f4eb..04be178b72f8 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -75,20 +75,21 @@ struct tid_pageset { static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, struct hfi1_filedata *fd); -static u32 find_phys_blocks(struct page **pages, unsigned npages, - struct tid_pageset *list); -static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, +static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages); +static int set_rcvarray_entry(struct hfi1_filedata *fd, + struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, - struct page **pages, unsigned npages); + u16 pageidx, unsigned int npages); static int tid_rb_insert(void *arg, struct mmu_rb_node *node); static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode); static void tid_rb_remove(void *arg, struct mmu_rb_node *node); static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode); -static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, - struct tid_group *grp, struct tid_pageset *sets, - unsigned start, u16 count, struct page **pages, - u32 *tidlist, unsigned *tididx, unsigned *pmapped); +static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, + struct tid_group *grp, + unsigned int start, u16 count, + u32 *tidlist, unsigned int *tididx, + unsigned int *pmapped); static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, struct tid_group **grp); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); @@ -198,6 +199,92 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } +/** + * Release pinned receive buffer pages. + * + * @mapped - true if the pages have been DMA mapped. false otherwise. + * @idx - Index of the first page to unpin. + * @npages - No of pages to unpin. + * + * If the pages have been DMA mapped (indicated by mapped parameter), their + * info will be passed via a struct tid_rb_node. If they haven't been mapped, + * their info will be passed via a struct tid_user_buf. + */ +static void unpin_rcv_pages(struct hfi1_filedata *fd, + struct tid_user_buf *tidbuf, + struct tid_rb_node *node, + unsigned int idx, + unsigned int npages, + bool mapped) +{ + struct page **pages; + struct hfi1_devdata *dd = fd->uctxt->dd; + + if (mapped) { + pci_unmap_single(dd->pcidev, node->dma_addr, + node->mmu.len, PCI_DMA_FROMDEVICE); + pages = &node->pages[idx]; + } else { + pages = &tidbuf->pages[idx]; + } + hfi1_release_user_pages(fd->mm, pages, npages, mapped); + fd->tid_n_pinned -= npages; +} + +/** + * Pin receive buffer pages. + */ +static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) +{ + int pinned; + unsigned int npages; + unsigned long vaddr = tidbuf->vaddr; + struct page **pages = NULL; + struct hfi1_devdata *dd = fd->uctxt->dd; + + /* Get the number of pages the user buffer spans */ + npages = num_user_pages(vaddr, tidbuf->length); + if (!npages) + return -EINVAL; + + if (npages > fd->uctxt->expected_count) { + dd_dev_err(dd, "Expected buffer too big\n"); + return -EINVAL; + } + + /* Verify that access is OK for the user buffer */ + if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, + npages * PAGE_SIZE)) { + dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", + (void *)vaddr, npages); + return -EFAULT; + } + /* Allocate the array of struct page pointers needed for pinning */ + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + /* + * Pin all the pages of the user buffer. If we can't pin all the + * pages, accept the amount pinned so far and program only that. + * User space knows how to deal with partially programmed buffers. + */ + if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { + kfree(pages); + return -ENOMEM; + } + + pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + if (pinned <= 0) { + kfree(pages); + return pinned; + } + tidbuf->pages = pages; + tidbuf->npages = npages; + fd->tid_n_pinned += pinned; + return pinned; +} + /* * RcvArray entry allocation for Expected Receives is done by the * following algorithm: @@ -253,62 +340,33 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, int ret = 0, need_group = 0, pinned; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets, + unsigned int ngroups, pageidx = 0, pageset_count, tididx = 0, mapped, mapped_pages = 0; - unsigned long vaddr = tinfo->vaddr; - struct page **pages = NULL; u32 *tidlist = NULL; - struct tid_pageset *pagesets = NULL; - - /* Get the number of pages the user buffer spans */ - npages = num_user_pages(vaddr, tinfo->length); - if (!npages) - return -EINVAL; - - if (npages > uctxt->expected_count) { - dd_dev_err(dd, "Expected buffer too big\n"); - return -EINVAL; - } - - /* Verify that access is OK for the user buffer */ - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, - npages * PAGE_SIZE)) { - dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", - (void *)vaddr, npages); - return -EFAULT; - } + struct tid_user_buf *tidbuf; - pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets), - GFP_KERNEL); - if (!pagesets) + tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); + if (!tidbuf) return -ENOMEM; - /* Allocate the array of struct page pointers needed for pinning */ - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - goto bail; - } - - /* - * Pin all the pages of the user buffer. If we can't pin all the - * pages, accept the amount pinned so far and program only that. - * User space knows how to deal with partially programmed buffers. - */ - if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) { - ret = -ENOMEM; - goto bail; + tidbuf->vaddr = tinfo->vaddr; + tidbuf->length = tinfo->length; + tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), + GFP_KERNEL); + if (!tidbuf->psets) { + kfree(tidbuf); + return -ENOMEM; } - pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages); + pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - ret = pinned; - goto bail; + kfree(tidbuf->psets); + kfree(tidbuf); + return pinned; } - fd->tid_n_pinned += npages; /* Find sets of physically contiguous pages */ - npagesets = find_phys_blocks(pages, pinned, pagesets); + tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); /* * We don't need to access this under a lock since tid_used is per @@ -316,10 +374,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, * and hfi1_user_exp_rcv_setup() at the same time. */ spin_lock(&fd->tid_lock); - if (fd->tid_used + npagesets > fd->tid_limit) + if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else - pageset_count = npagesets; + pageset_count = tidbuf->n_psets; spin_unlock(&fd->tid_lock); if (!pageset_count) @@ -347,9 +405,9 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct tid_group *grp = tid_group_pop(&uctxt->tid_group_list); - ret = program_rcvarray(fd, vaddr, grp, pagesets, + ret = program_rcvarray(fd, tidbuf, grp, pageidx, dd->rcv_entries.group_size, - pages, tidlist, &tididx, &mapped); + tidlist, &tididx, &mapped); /* * If there was a failure to program the RcvArray * entries for the entire group, reset the grp fields @@ -393,8 +451,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned use = min_t(unsigned, pageset_count - pageidx, grp->size - grp->used); - ret = program_rcvarray(fd, vaddr, grp, pagesets, - pageidx, use, pages, tidlist, + ret = program_rcvarray(fd, tidbuf, grp, + pageidx, use, tidlist, &tididx, &mapped); if (ret < 0) { hfi1_cdbg(TID, @@ -454,16 +512,14 @@ nomem: * If not everything was mapped (due to insufficient RcvArray entries, * for example), unpin all unmapped pages so we can pin them nex time. */ - if (mapped_pages != pinned) { - hfi1_release_user_pages(fd->mm, &pages[mapped_pages], - pinned - mapped_pages, - false); - fd->tid_n_pinned -= pinned - mapped_pages; - } + if (mapped_pages != pinned) + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, + (pinned - mapped_pages), false); bail: - kfree(pagesets); - kfree(pages); + kfree(tidbuf->psets); kfree(tidlist); + kfree(tidbuf->pages); + kfree(tidbuf); return ret > 0 ? 0 : ret; } @@ -553,11 +609,12 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, return ret; } -static u32 find_phys_blocks(struct page **pages, unsigned npages, - struct tid_pageset *list) +static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages) { unsigned pagecount, pageidx, setcount = 0, i; unsigned long pfn, this_pfn; + struct page **pages = tidbuf->pages; + struct tid_pageset *list = tidbuf->psets; if (!npages) return 0; @@ -620,13 +677,13 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, /** * program_rcvarray() - program an RcvArray group with receive buffers * @fd: filedata pointer - * @vaddr: starting user virtual address + * @tbuf: pointer to struct tid_user_buf that has the user buffer starting + * virtual address, buffer length, page pointers, pagesets (array of + * struct tid_pageset holding information on physically contiguous + * chunks from the user buffer), and other fields. * @grp: RcvArray group - * @sets: array of struct tid_pageset holding information on physically - * contiguous chunks from the user buffer * @start: starting index into sets array * @count: number of struct tid_pageset's to program - * @pages: an array of struct page * for the user buffer * @tidlist: the array of u32 elements when the information about the * programmed RcvArray entries is to be encoded. * @tididx: starting offset into tidlist @@ -644,11 +701,11 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages, * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or * number of RcvArray entries programmed. */ -static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, +static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, struct tid_group *grp, - struct tid_pageset *sets, - unsigned start, u16 count, struct page **pages, - u32 *tidlist, unsigned *tididx, unsigned *pmapped) + unsigned int start, u16 count, + u32 *tidlist, unsigned int *tididx, + unsigned int *pmapped) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -687,11 +744,11 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, } rcventry = grp->base + useidx; - npages = sets[setidx].count; - pageidx = sets[setidx].idx; + npages = tbuf->psets[setidx].count; + pageidx = tbuf->psets[setidx].idx; - ret = set_rcvarray_entry(fd, vaddr + (pageidx * PAGE_SIZE), - rcventry, grp, pages + pageidx, + ret = set_rcvarray_entry(fd, tbuf, + rcventry, grp, pageidx, npages); if (ret) return ret; @@ -712,15 +769,17 @@ static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, return idx; } -static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, +static int set_rcvarray_entry(struct hfi1_filedata *fd, + struct tid_user_buf *tbuf, u32 rcventry, struct tid_group *grp, - struct page **pages, unsigned npages) + u16 pageidx, unsigned int npages) { int ret; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct tid_rb_node *node; struct hfi1_devdata *dd = uctxt->dd; dma_addr_t phys; + struct page **pages = tbuf->pages + pageidx; /* * Allocate the node first so we can handle a potential @@ -741,7 +800,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, return -EFAULT; } - node->mmu.addr = vaddr; + node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE); node->mmu.len = npages * PAGE_SIZE; node->phys = page_to_phys(pages[0]); node->npages = npages; @@ -820,10 +879,7 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) */ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); - pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len, - PCI_DMA_FROMDEVICE); - hfi1_release_user_pages(fd->mm, node->pages, node->npages, true); - fd->tid_n_pinned -= node->npages; + unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 6cbaa4cf4970..8c4eb5d8547e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,6 +51,15 @@ #include "exp_rcv.h" +struct tid_user_buf { + unsigned long vaddr; + unsigned long length; + unsigned int npages; + struct page **pages; + struct tid_pageset *psets; + unsigned int n_psets; +}; + int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); -- cgit v1.2.3 From 624b9ac15c3cf0896721e76f4e23ab0d25148119 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:26:57 -0700 Subject: IB/hfi1: Clean up user_sdma_send_pkts() function user_sdma_send_pkts() function is unnecessarily long. Clean it up by moving some of its code into separate functions. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 141 +++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 59 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 9b89df584943..d5a2572cc5a1 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -847,6 +847,84 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len); } +static int user_sdma_txadd_ahg(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + u32 datalen) +{ + int ret; + u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); + u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen)); + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + /* + * Copy the request header into the tx header + * because the HW needs a cacheline-aligned + * address. + * This copy can be optimized out if the hdr + * member of user_sdma_request were also + * cacheline aligned. + */ + memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); + if (PBC2LRH(pbclen) != lrhlen) { + pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen); + tx->hdr.pbc[0] = cpu_to_le16(pbclen); + } + ret = check_header_template(req, &tx->hdr, lrhlen, datalen); + if (ret) + return ret; + ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY, + sizeof(tx->hdr) + datalen, req->ahg_idx, + 0, NULL, 0, user_sdma_txreq_cb); + if (ret) + return ret; + ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr)); + if (ret) + sdma_txclean(pq->dd, &tx->txreq); + return ret; +} + +static int user_sdma_txadd(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, u32 datalen, + u32 *queued_ptr, u32 *data_sent_ptr, + u64 *iov_offset_ptr) +{ + int ret; + unsigned int pageidx, len; + unsigned long base, offset; + u64 iov_offset = *iov_offset_ptr; + u32 queued = *queued_ptr, data_sent = *data_sent_ptr; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + base = (unsigned long)iovec->iov.iov_base; + offset = offset_in_page(base + iovec->offset + iov_offset); + pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >> + PAGE_SHIFT); + len = offset + req->info.fragsize > PAGE_SIZE ? + PAGE_SIZE - offset : req->info.fragsize; + len = min((datalen - queued), len); + ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx], + offset, len); + if (ret) { + SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret); + return ret; + } + iov_offset += len; + queued += len; + data_sent += len; + if (unlikely(queued < datalen && pageidx == iovec->npages && + req->iov_idx < req->data_iovs - 1)) { + iovec->offset += iov_offset; + iovec = &req->iovs[++req->iov_idx]; + iov_offset = 0; + } + + *queued_ptr = queued; + *data_sent_ptr = data_sent; + *iov_offset_ptr = iov_offset; + return ret; +} + static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { int ret = 0, count; @@ -943,39 +1021,9 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) if (req->ahg_idx >= 0) { if (!req->seqnum) { - u16 pbclen = le16_to_cpu(req->hdr.pbc[0]); - u32 lrhlen = get_lrh_len(req->hdr, - pad_len(datalen)); - /* - * Copy the request header into the tx header - * because the HW needs a cacheline-aligned - * address. - * This copy can be optimized out if the hdr - * member of user_sdma_request were also - * cacheline aligned. - */ - memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr)); - if (PBC2LRH(pbclen) != lrhlen) { - pbclen = (pbclen & 0xf000) | - LRH2PBC(lrhlen); - tx->hdr.pbc[0] = cpu_to_le16(pbclen); - } - ret = check_header_template(req, &tx->hdr, - lrhlen, datalen); - if (ret) - goto free_tx; - ret = sdma_txinit_ahg(&tx->txreq, - SDMA_TXREQ_F_AHG_COPY, - sizeof(tx->hdr) + datalen, - req->ahg_idx, 0, NULL, 0, - user_sdma_txreq_cb); + ret = user_sdma_txadd_ahg(req, tx, datalen); if (ret) goto free_tx; - ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, - &tx->hdr, - sizeof(tx->hdr)); - if (ret) - goto free_txreq; } else { int changes; @@ -1006,35 +1054,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) */ while (queued < datalen && (req->sent + data_sent) < req->data_len) { - unsigned long base, offset; - unsigned pageidx, len; - - base = (unsigned long)iovec->iov.iov_base; - offset = offset_in_page(base + iovec->offset + - iov_offset); - pageidx = (((iovec->offset + iov_offset + - base) - (base & PAGE_MASK)) >> PAGE_SHIFT); - len = offset + req->info.fragsize > PAGE_SIZE ? - PAGE_SIZE - offset : req->info.fragsize; - len = min((datalen - queued), len); - ret = sdma_txadd_page(pq->dd, &tx->txreq, - iovec->pages[pageidx], - offset, len); - if (ret) { - SDMA_DBG(req, "SDMA txreq add page failed %d\n", - ret); + ret = user_sdma_txadd(req, tx, iovec, datalen, + &queued, &data_sent, &iov_offset); + if (ret) goto free_txreq; - } - iov_offset += len; - queued += len; - data_sent += len; - if (unlikely(queued < datalen && - pageidx == iovec->npages && - req->iov_idx < req->data_iovs - 1)) { - iovec->offset += iov_offset; - iovec = &req->iovs[++req->iov_idx]; - iov_offset = 0; - } } /* * The txreq was submitted successfully so we can update -- cgit v1.2.3 From 4c6c9aa6cbb0574c96635f3e4e1c5791e8fafca7 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:03 -0700 Subject: IB/hfi1: Clean up pin_vector_pages() function Clean up pin_vector_pages() function by moving page pinning related code to a separate function since it really stands on its own. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 79 +++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 34 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index d5a2572cc5a1..6f26253d2b7a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1124,11 +1124,53 @@ static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) return evict_data.cleared; } +static int pin_sdma_pages(struct user_sdma_request *req, + struct user_sdma_iovec *iovec, + struct sdma_mmu_node *node, + int npages) +{ + int pinned, cleared; + struct page **pages; + struct hfi1_user_sdma_pkt_q *pq = req->pq; + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + SDMA_DBG(req, "Failed page array alloc"); + return -ENOMEM; + } + memcpy(pages, node->pages, node->npages * sizeof(*pages)); + + npages -= node->npages; +retry: + if (!hfi1_can_pin_pages(pq->dd, pq->mm, + atomic_read(&pq->n_locked), npages)) { + cleared = sdma_cache_evict(pq, npages); + if (cleared >= npages) + goto retry; + } + pinned = hfi1_acquire_user_pages(pq->mm, + ((unsigned long)iovec->iov.iov_base + + (node->npages * PAGE_SIZE)), npages, 0, + pages + node->npages); + if (pinned < 0) { + kfree(pages); + return pinned; + } + if (pinned != npages) { + unpin_vector_pages(pq->mm, pages, node->npages, pinned); + return -EFAULT; + } + kfree(node->pages); + node->rb.len = iovec->iov.iov_len; + node->pages = pages; + atomic_add(pinned, &pq->n_locked); + return pinned; +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { - int ret = 0, pinned, npages, cleared; - struct page **pages; + int ret = 0, pinned, npages; struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; @@ -1162,44 +1204,13 @@ static int pin_vector_pages(struct user_sdma_request *req, npages = num_user_pages(&iovec->iov); if (node->npages < npages) { - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - SDMA_DBG(req, "Failed page array alloc"); - ret = -ENOMEM; - goto bail; - } - memcpy(pages, node->pages, node->npages * sizeof(*pages)); - - npages -= node->npages; - -retry: - if (!hfi1_can_pin_pages(pq->dd, pq->mm, - atomic_read(&pq->n_locked), npages)) { - cleared = sdma_cache_evict(pq, npages); - if (cleared >= npages) - goto retry; - } - pinned = hfi1_acquire_user_pages(pq->mm, - ((unsigned long)iovec->iov.iov_base + - (node->npages * PAGE_SIZE)), npages, 0, - pages + node->npages); + pinned = pin_sdma_pages(req, iovec, node, npages); if (pinned < 0) { - kfree(pages); ret = pinned; goto bail; } - if (pinned != npages) { - unpin_vector_pages(pq->mm, pages, node->npages, - pinned); - ret = -EFAULT; - goto bail; - } - kfree(node->pages); - node->rb.len = iovec->iov.iov_len; - node->pages = pages; node->npages += pinned; npages = node->npages; - atomic_add(pinned, &pq->n_locked); } iovec->pages = node->pages; iovec->npages = npages; -- cgit v1.2.3 From 04a646df12766e4e11ecf09cd0336dab69d75034 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:09 -0700 Subject: IB/hfi1: Fix the bail out code in pin_vector_pages() function In pin_vector_pages() function, if there is any error while pinning the pages or while adding a pinned buffer to the cache, the bail out code needs to unpin any pinned pages that are not in the cache and adjust the n_locked counter that counts the total pages pinned. The current bail out code doesn't seem to be doing it right in two cases: 1. Before pinning required pages for a buffer, the SDMA pinned buffer cache is searched to see if the virtual address range that needs to be pinned is already pinned. If there isn't a hit in the cache, a new node is created for the buffer and is added to the cache after the buffer is pinned. If adding the new node to the cache fails, the n_locked count is decremented properly but the pinned pages are not freed. This commit fixes this issue. 2. If there is a hit in the SDMA cache, but the cached buffer doesn't have enough pages to cover the entire address range that needs to be pinned, the node for the cached buffer is extracted from the cache, remaining pages needed are pinned and added to the node. The node is finally added back into the cache. If there is an error pinning the extra pages, the bail out code frees all the pages in the node but the n_locked count is not being decremented by the no of pages in the node that are freed. This commit fixes this issue. This commit fixes the above two issues by creating a new function that frees the pages in a node and decrements the n_locked count by the number of pages freed. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 6f26253d2b7a..a3a9925f408a 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1167,6 +1167,14 @@ retry: return pinned; } +static void unpin_sdma_pages(struct sdma_mmu_node *node) +{ + if (node->npages) { + unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); + atomic_sub(node->npages, &node->pq->n_locked); + } +} + static int pin_vector_pages(struct user_sdma_request *req, struct user_sdma_iovec *iovec) { @@ -1218,14 +1226,12 @@ static int pin_vector_pages(struct user_sdma_request *req, ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb); if (ret) { - atomic_sub(node->npages, &pq->n_locked); iovec->node = NULL; goto bail; } return 0; bail: - if (rb_node) - unpin_vector_pages(pq->mm, node->pages, 0, node->npages); + unpin_sdma_pages(node); kfree(node); return ret; } @@ -1671,10 +1677,7 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) struct sdma_mmu_node *node = container_of(mnode, struct sdma_mmu_node, rb); - atomic_sub(node->npages, &node->pq->n_locked); - - unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages); - + unpin_sdma_pages(node); kfree(node); } -- cgit v1.2.3 From ddd3affb50015c3c7ee218b83e73e646aa48e212 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:16 -0700 Subject: IB/hfi1: Remove duplicate definitions of num_user_pages() function num_user_pages() function has been defined in both user_exp_rcv.c file and user_sdma.c file. Move the function definition to a header file so there is only one definition in the source repo. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 5 ----- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 9 +++++++++ drivers/infiniband/hw/hfi1/user_sdma.c | 18 +++--------------- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 04be178b72f8..026790ee0e84 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -67,11 +67,6 @@ struct tid_pageset { u16 count; }; -#define num_user_pages(vaddr, len) \ - (1 + (((((unsigned long)(vaddr) + \ - (unsigned long)(len) - 1) & PAGE_MASK) - \ - ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT)) - static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, struct hfi1_filedata *fd); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 8c4eb5d8547e..7461d11b01c4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -60,6 +60,15 @@ struct tid_user_buf { unsigned int n_psets; }; +static inline int num_user_pages(unsigned long addr, + unsigned long len) +{ + const unsigned long spage = addr & PAGE_MASK; + const unsigned long epage = (addr + len - 1) & PAGE_MASK; + + return 1 + ((epage - spage) >> PAGE_SHIFT); +} + int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a3a9925f408a..2837407d4daa 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -246,7 +246,6 @@ struct user_sdma_txreq { static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts); -static int num_user_pages(const struct iovec *iov); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); @@ -1101,19 +1100,6 @@ free_tx: return ret; } -/* - * How many pages in this iovec element? - */ -static inline int num_user_pages(const struct iovec *iov) -{ - const unsigned long addr = (unsigned long)iov->iov_base; - const unsigned long len = iov->iov_len; - const unsigned long spage = addr & PAGE_MASK; - const unsigned long epage = (addr + len - 1) & PAGE_MASK; - - return 1 + ((epage - spage) >> PAGE_SHIFT); -} - static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) { struct evict_data evict_data; @@ -1182,6 +1168,7 @@ static int pin_vector_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; struct sdma_mmu_node *node = NULL; struct mmu_rb_node *rb_node; + struct iovec *iov; bool extracted; extracted = @@ -1210,7 +1197,8 @@ static int pin_vector_pages(struct user_sdma_request *req, atomic_set(&node->refcount, 0); } - npages = num_user_pages(&iovec->iov); + iov = &iovec->iov; + npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len); if (node->npages < npages) { pinned = pin_sdma_pages(req, iovec, node, npages); if (pinned < 0) { -- cgit v1.2.3 From 637f4600a8d3be44146ad7fbb5188484c3b0a1d4 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:23 -0700 Subject: IB/hfi1: Move structure definitions from user_exp_rcv.c to user_exp_rcv.h Clean up user_exp_rcv.c file by moving structure definitions into header file user_exp_rcv.h. Since these structure definitions depend on the structure definitions in mmu_rb.h, move #include "mmu_rb.h" above the include "user_exp_rcv.h" or include of header files that include user_exp_rcv.h Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 18 +----------------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 16 ++++++++++++++++ drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index ab8eb2bf48d8..e0fd8fc0a7ab 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,10 +58,10 @@ #include "device.h" #include "common.h" #include "trace.h" +#include "mmu_rb.h" #include "user_sdma.h" #include "user_exp_rcv.h" #include "aspm.h" -#include "mmu_rb.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 026790ee0e84..6f6c14df383e 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -47,25 +47,9 @@ #include #include +#include "mmu_rb.h" #include "user_exp_rcv.h" #include "trace.h" -#include "mmu_rb.h" - -struct tid_rb_node { - struct mmu_rb_node mmu; - unsigned long phys; - struct tid_group *grp; - u32 rcventry; - dma_addr_t dma_addr; - bool freed; - unsigned npages; - struct page *pages[0]; -}; - -struct tid_pageset { - u16 idx; - u16 count; -}; static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, struct exp_tid_set *set, diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index 7461d11b01c4..e383cc01a2bf 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -51,6 +51,11 @@ #include "exp_rcv.h" +struct tid_pageset { + u16 idx; + u16 count; +}; + struct tid_user_buf { unsigned long vaddr; unsigned long length; @@ -60,6 +65,17 @@ struct tid_user_buf { unsigned int n_psets; }; +struct tid_rb_node { + struct mmu_rb_node mmu; + unsigned long phys; + struct tid_group *grp; + u32 rcventry; + dma_addr_t dma_addr; + bool freed; + unsigned int npages; + struct page *pages[0]; +}; + static inline int num_user_pages(unsigned long addr, unsigned long len) { diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 2837407d4daa..8a1653ab2f65 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -64,11 +64,11 @@ #include "hfi.h" #include "sdma.h" +#include "mmu_rb.h" #include "user_sdma.h" #include "verbs.h" /* for the headers */ #include "common.h" /* for struct hfi1_tid_info */ #include "trace.h" -#include "mmu_rb.h" static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); -- cgit v1.2.3 From 32500f2763906602725006f1dc833c8ea28dfd07 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Mon, 21 Aug 2017 18:27:29 -0700 Subject: IB/hfi1: Move structure and MACRO definitions in user_sdma.c to user_sdma.h Clean up user_sdma.c by moving the structure and MACRO definitions into the header file user_sdma.h Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 168 --------------------------------- drivers/infiniband/hw/hfi1/user_sdma.h | 166 ++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 168 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8a1653ab2f65..dacb0fce49c6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -74,176 +74,8 @@ static uint hfi1_sdma_comp_ring_size = 128; module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO); MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128"); -/* The maximum number of Data io vectors per message/request */ -#define MAX_VECTORS_PER_REQ 8 -/* - * Maximum number of packet to send from each message/request - * before moving to the next one. - */ -#define MAX_PKTS_PER_QUEUE 16 - -#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT)) - -#define req_opcode(x) \ - (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_version(x) \ - (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) -#define req_iovcnt(x) \ - (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK) - -/* Number of BTH.PSN bits used for sequence number in expected rcvs */ -#define BTH_SEQ_MASK 0x7ffull - -#define AHG_KDETH_INTR_SHIFT 12 -#define AHG_KDETH_SH_SHIFT 13 -#define AHG_KDETH_ARRAY_SIZE 9 - -#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) -#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) - -#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ - do { \ - if ((idx) < ARRAY_SIZE((arr))) \ - (arr)[(idx++)] = sdma_build_ahg_descriptor( \ - (__force u16)(value), (dw), (bit), \ - (width)); \ - else \ - return -ERANGE; \ - } while (0) - -/* Tx request flag bits */ -#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ -#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ - -#define SDMA_PKT_Q_INACTIVE BIT(0) -#define SDMA_PKT_Q_ACTIVE BIT(1) -#define SDMA_PKT_Q_DEFERRED BIT(2) - -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - static unsigned initial_pkt_count = 8; -#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ - -struct sdma_mmu_node; - -struct user_sdma_iovec { - struct list_head list; - struct iovec iov; - /* number of pages in this vector */ - unsigned npages; - /* array of pinned pages for this vector */ - struct page **pages; - /* - * offset into the virtual address space of the vector at - * which we last left off. - */ - u64 offset; - struct sdma_mmu_node *node; -}; - -struct sdma_mmu_node { - struct mmu_rb_node rb; - struct hfi1_user_sdma_pkt_q *pq; - atomic_t refcount; - struct page **pages; - unsigned npages; -}; - -/* evict operation argument */ -struct evict_data { - u32 cleared; /* count evicted so far */ - u32 target; /* target count to evict */ -}; - -struct user_sdma_request { - /* This is the original header from user space */ - struct hfi1_pkt_header hdr; - - /* Read mostly fields */ - struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; - struct hfi1_user_sdma_comp_q *cq; - /* - * Pointer to the SDMA engine for this request. - * Since different request could be on different VLs, - * each request will need it's own engine pointer. - */ - struct sdma_engine *sde; - struct sdma_req_info info; - /* TID array values copied from the tid_iov vector */ - u32 *tids; - /* total length of the data in the request */ - u32 data_len; - /* number of elements copied to the tids array */ - u16 n_tids; - /* - * We copy the iovs for this request (based on - * info.iovcnt). These are only the data vectors - */ - u8 data_iovs; - s8 ahg_idx; - - /* Writeable fields shared with interrupt */ - u64 seqcomp ____cacheline_aligned_in_smp; - u64 seqsubmitted; - /* status of the last txreq completed */ - int status; - - /* Send side fields */ - struct list_head txps ____cacheline_aligned_in_smp; - u64 seqnum; - /* - * KDETH.OFFSET (TID) field - * The offset can cover multiple packets, depending on the - * size of the TID entry. - */ - u32 tidoffset; - /* - * KDETH.Offset (Eager) field - * We need to remember the initial value so the headers - * can be updated properly. - */ - u32 koffset; - u32 sent; - /* TID index copied from the tid_iov vector */ - u16 tididx; - /* progress index moving along the iovs array */ - u8 iov_idx; - u8 done; - u8 has_error; - - struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; -} ____cacheline_aligned_in_smp; - -/* - * A single txreq could span up to 3 physical pages when the MTU - * is sufficiently large (> 4K). Each of the IOV pointers also - * needs it's own set of flags so the vector has been handled - * independently of each other. - */ -struct user_sdma_txreq { - /* Packet header for the txreq */ - struct hfi1_pkt_header hdr; - struct sdma_txreq txreq; - struct list_head list; - struct user_sdma_request *req; - u16 flags; - unsigned busycount; - u64 seqnum; -}; - -#define SDMA_DBG(req, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ - (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ - ##__VA_ARGS__) -#define SDMA_Q_DBG(pq, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ - (pq)->subctxt, ##__VA_ARGS__) - static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 84c199d3e003..6c10484e972f 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -53,6 +53,67 @@ #include "iowait.h" #include "user_exp_rcv.h" +/* The maximum number of Data io vectors per message/request */ +#define MAX_VECTORS_PER_REQ 8 +/* + * Maximum number of packet to send from each message/request + * before moving to the next one. + */ +#define MAX_PKTS_PER_QUEUE 16 + +#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT)) + +#define req_opcode(x) \ + (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) +#define req_version(x) \ + (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK) +#define req_iovcnt(x) \ + (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK) + +/* Number of BTH.PSN bits used for sequence number in expected rcvs */ +#define BTH_SEQ_MASK 0x7ffull + +#define AHG_KDETH_INTR_SHIFT 12 +#define AHG_KDETH_SH_SHIFT 13 +#define AHG_KDETH_ARRAY_SIZE 9 + +#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) +#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) + +#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ + do { \ + if ((idx) < ARRAY_SIZE((arr))) \ + (arr)[(idx++)] = sdma_build_ahg_descriptor( \ + (__force u16)(value), (dw), (bit), \ + (width)); \ + else \ + return -ERANGE; \ + } while (0) + +/* Tx request flag bits */ +#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ +#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ + +#define SDMA_PKT_Q_INACTIVE BIT(0) +#define SDMA_PKT_Q_ACTIVE BIT(1) +#define SDMA_PKT_Q_DEFERRED BIT(2) + +/* + * Maximum retry attempts to submit a TX request + * before putting the process to sleep. + */ +#define MAX_DEFER_RETRY_COUNT 1 + +#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ + +#define SDMA_DBG(req, fmt, ...) \ + hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ + (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ + ##__VA_ARGS__) +#define SDMA_Q_DBG(pq, fmt, ...) \ + hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ + (pq)->subctxt, ##__VA_ARGS__) + extern uint extended_psn; struct hfi1_user_sdma_pkt_q { @@ -79,6 +140,111 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; +struct sdma_mmu_node { + struct mmu_rb_node rb; + struct hfi1_user_sdma_pkt_q *pq; + atomic_t refcount; + struct page **pages; + unsigned int npages; +}; + +struct user_sdma_iovec { + struct list_head list; + struct iovec iov; + /* number of pages in this vector */ + unsigned int npages; + /* array of pinned pages for this vector */ + struct page **pages; + /* + * offset into the virtual address space of the vector at + * which we last left off. + */ + u64 offset; + struct sdma_mmu_node *node; +}; + +/* evict operation argument */ +struct evict_data { + u32 cleared; /* count evicted so far */ + u32 target; /* target count to evict */ +}; + +struct user_sdma_request { + /* This is the original header from user space */ + struct hfi1_pkt_header hdr; + + /* Read mostly fields */ + struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp; + struct hfi1_user_sdma_comp_q *cq; + /* + * Pointer to the SDMA engine for this request. + * Since different request could be on different VLs, + * each request will need it's own engine pointer. + */ + struct sdma_engine *sde; + struct sdma_req_info info; + /* TID array values copied from the tid_iov vector */ + u32 *tids; + /* total length of the data in the request */ + u32 data_len; + /* number of elements copied to the tids array */ + u16 n_tids; + /* + * We copy the iovs for this request (based on + * info.iovcnt). These are only the data vectors + */ + u8 data_iovs; + s8 ahg_idx; + + /* Writeable fields shared with interrupt */ + u64 seqcomp ____cacheline_aligned_in_smp; + u64 seqsubmitted; + /* status of the last txreq completed */ + int status; + + /* Send side fields */ + struct list_head txps ____cacheline_aligned_in_smp; + u64 seqnum; + /* + * KDETH.OFFSET (TID) field + * The offset can cover multiple packets, depending on the + * size of the TID entry. + */ + u32 tidoffset; + /* + * KDETH.Offset (Eager) field + * We need to remember the initial value so the headers + * can be updated properly. + */ + u32 koffset; + u32 sent; + /* TID index copied from the tid_iov vector */ + u16 tididx; + /* progress index moving along the iovs array */ + u8 iov_idx; + u8 done; + u8 has_error; + + struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; +} ____cacheline_aligned_in_smp; + +/* + * A single txreq could span up to 3 physical pages when the MTU + * is sufficiently large (> 4K). Each of the IOV pointers also + * needs it's own set of flags so the vector has been handled + * independently of each other. + */ +struct user_sdma_txreq { + /* Packet header for the txreq */ + struct hfi1_pkt_header hdr; + struct sdma_txreq txreq; + struct list_head list; + struct user_sdma_request *req; + u16 flags; + unsigned int busycount; + u64 seqnum; +}; + int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct hfi1_filedata *fd); int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, -- cgit v1.2.3 From d68e68e5fb7134427c6d79c58beaaf869543f3f7 Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 21 Aug 2017 18:27:35 -0700 Subject: IB/hfi1: Fix whitespace alignment issue for MAD Fix a tab alignment issue present in pr_err_ratelimited error message. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4849130d49bc..f4c0ffc040cc 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -158,7 +158,7 @@ static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp, if (queue_id >= RVT_MAX_TRAP_LISTS) { trap_count++; pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n", - trap->data.generic_type, trap_count); + trap->data.generic_type, trap_count); kfree(trap); return NULL; } -- cgit v1.2.3 From 642aaab5a63c043c593ba9e9a86aadd388313e56 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 21 Aug 2017 18:27:41 -0700 Subject: IB/hfi1: Add received request info to qp_stats The rvt_ack_entry pointed to by s_tail_ack_queue provides important info about the request that has just been processed or is being processed on the responder side of a RC connection. This patch adds this info to the qp_stats to assist debugging. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 0fca6dfe8d9f..ee836c0978cc 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -626,12 +626,15 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) struct hfi1_qp_priv *priv = qp->priv; struct sdma_engine *sde; struct send_context *send_context; + struct rvt_ack_entry *e = NULL; sde = qp_to_sdma_engine(qp, priv->s_sc); wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); + if (qp->s_ack_queue) + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d E %x %x %x\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -672,7 +675,11 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) send_context ? send_context->sw_index : 0, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, - qp->pid); + qp->pid, + /* ack queue information */ + e ? e->opcode : 0, + e ? e->psn : 0, + e ? e->lpsn : 0); } void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) -- cgit v1.2.3 From 280ad49a97ee7adf5c86f70f7ea1ff60a755b294 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Aug 2017 18:27:48 -0700 Subject: IB/hfi1: Add opcode states to qp_stats These fields allow for debugging send engine processing. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index ee836c0978cc..b87889d4e9e4 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -634,7 +634,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) if (qp->s_ack_queue) e = &qp->s_ack_queue[qp->s_tail_ack_queue]; seq_printf(s, - "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d E %x %x %x\n", + "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -676,6 +676,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, qp->pid, + qp->s_state, + qp->s_ack_state, /* ack queue information */ e ? e->opcode : 0, e ? e->psn : 0, -- cgit v1.2.3 From d518a44d317d92f4c297ea26a308b1ac1a980d33 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 25 Aug 2017 11:43:59 +0300 Subject: IB/usnic: check for allocation failure usnic_uiom_get_dev_list() can return ERR_PTR(-ENOMEM) so we should check for that. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Dan Carpenter Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 97dd79ebb590..e4113ef09315 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -164,6 +164,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, if (usnic_ib_share_vf) { /* Try to find resouces on a used vf which is in pd */ dev_list = usnic_uiom_get_dev_list(pd->umem_pd); + if (IS_ERR(dev_list)) + return ERR_CAST(dev_list); for (i = 0; dev_list[i]; i++) { dev = dev_list[i]; vf = pci_get_drvdata(to_pci_dev(dev)); -- cgit v1.2.3 From 2d72d6c4ec1e3761e8cfc69d58d498b2dbe46341 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Fri, 25 Aug 2017 21:41:02 +0530 Subject: RDMA/bnxt_re: remove unnecessary call to memset call to memset to assign 0 value immediately after allocating memory with kzalloc is unnecesaary as kzalloc allocates the memory filled with 0 value. Signed-off-by: Himanshu Jha Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 43b3dee4b6ba..01eee15bbd65 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -911,7 +911,6 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah if (!ah) return NULL; - memset(ah, 0, sizeof(*ah)); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; @@ -958,7 +957,6 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp if (!qp) return NULL; - memset(qp, 0, sizeof(*qp)); qp->rdev = rdev; /* Initialize the shadow QP structure from the QP1 values */ -- cgit v1.2.3 From 733da3bcb3a1293fab1e7703982540dde847d716 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 28 Aug 2017 09:59:28 +0530 Subject: IB/hfi1: constify vm_operations_struct vm_operations_struct are not supposed to change at runtime. vm_area_struct structure working with const vm_operations_struct. So mark the non-const vm_operations_struct structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index e0fd8fc0a7ab..d893582e4450 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -120,7 +120,7 @@ static const struct file_operations hfi1_file_ops = { .llseek = noop_llseek, }; -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .fault = vma_fault, }; -- cgit v1.2.3 From cfeca08faf452acaf807576859275968cdb7e7a2 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 28 Aug 2017 21:51:23 +0530 Subject: i40iw: make some structures const Make some structures const as they are only used during a copy operation. Signed-off-by: Bhumika Goyal Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 1060725d18bc..0aadb7a0d1aa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -912,7 +912,7 @@ enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data return 0; } -static struct i40iw_qp_uk_ops iw_qp_uk_ops = { +static const struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_qp_post_wr = i40iw_qp_post_wr, .iw_qp_ring_push_db = i40iw_qp_ring_push_db, .iw_rdma_write = i40iw_rdma_write, @@ -926,14 +926,14 @@ static struct i40iw_qp_uk_ops iw_qp_uk_ops = { .iw_post_nop = i40iw_nop }; -static struct i40iw_cq_ops iw_cq_ops = { +static const struct i40iw_cq_ops iw_cq_ops = { .iw_cq_request_notification = i40iw_cq_request_notification, .iw_cq_poll_completion = i40iw_cq_poll_completion, .iw_cq_post_entries = i40iw_cq_post_entries, .iw_cq_clean = i40iw_clean_cq }; -static struct i40iw_device_uk_ops iw_device_uk_ops = { +static const struct i40iw_device_uk_ops iw_device_uk_ops = { .iwarp_cq_uk_init = i40iw_cq_uk_init, .iwarp_qp_uk_init = i40iw_qp_uk_init, }; -- cgit v1.2.3 From ba81a427c35c827bb220f1a87231b3460e0bd879 Mon Sep 17 00:00:00 2001 From: Jan Sokolowski Date: Mon, 28 Aug 2017 11:23:21 -0700 Subject: IB/hfi1: Acquire QSFP cable information on loopback Currently, QSFP information is not queried in cases where loopback was set up and QSFP module is present. Acquire QSFP information in case of loopback. Reviewed-by: Dennis Dalessandro Signed-off-by: Jan Sokolowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/platform.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 8004f1da7548..a8af96d2b1b0 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -944,6 +944,21 @@ void tune_serdes(struct hfi1_pportdata *ppd) if (loopback != LOOPBACK_NONE || ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { ppd->driver_link_ready = 1; + + if (qsfp_mod_present(ppd)) { + ret = acquire_chip_resource(ppd->dd, + qsfp_resource(ppd->dd), + QSFP_WAIT); + if (ret) { + dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n", + __func__, (int)ppd->dd->hfi1_id); + goto bail; + } + + refresh_qsfp_cache(ppd, &ppd->qsfp_info); + release_chip_resource(ppd->dd, qsfp_resource(ppd->dd)); + } + return; } -- cgit v1.2.3 From 34ab4de77fe512ba3aeabdcdc97504bd19791511 Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Mon, 28 Aug 2017 11:23:27 -0700 Subject: IB/hif1: Remove static tracing from SDMA hot path The hfi1_cdbg() macro can be instantiated in the hot path even when it is not in use. This shows up on perf profiles. Rework the macros (for SDMA and MMU), to use the trace interface directly to eliminate this performance hit. Reviewed-by: Mike Marciniszyn Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 3 +- drivers/infiniband/hw/hfi1/mmu_rb.c | 13 ++-- drivers/infiniband/hw/hfi1/trace.h | 3 +- drivers/infiniband/hw/hfi1/trace_mmu.h | 95 +++++++++++++++++++++++ drivers/infiniband/hw/hfi1/trace_tx.h | 136 ++++++++++++++++++++++++++++++++- drivers/infiniband/hw/hfi1/user_sdma.c | 31 ++++---- drivers/infiniband/hw/hfi1/user_sdma.h | 3 - 7 files changed, 255 insertions(+), 29 deletions(-) create mode 100644 drivers/infiniband/hw/hfi1/trace_mmu.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index d893582e4450..2bc89260235a 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -430,8 +430,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) if (!iter_is_iovec(from) || !dim) return -EINVAL; - hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)", - fd->uctxt->ctxt, fd->subctxt, dim); + trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) return -ENOSPC; diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index d41fd87a39f2..13dcef08cac1 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -172,9 +172,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, unsigned long flags; int ret = 0; + trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len); spin_lock_irqsave(&handler->lock, flags); - hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr, - mnode->len); node = __mmu_rb_search(handler, mnode->addr, mnode->len); if (node) { ret = -EINVAL; @@ -200,7 +199,7 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, { struct mmu_rb_node *node = NULL; - hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len); + trace_hfi1_mmu_rb_search(addr, len); if (!handler->ops->filter) { node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); @@ -281,8 +280,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, unsigned long flags; /* Validity of handler and node pointers has been checked by caller. */ - hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr, - node->len); + trace_hfi1_mmu_rb_remove(node->addr, node->len); spin_lock_irqsave(&handler->lock, flags); __mmu_int_rb_remove(node, &handler->root); list_del(&node->list); /* remove from LRU list */ @@ -321,8 +319,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, node; node = ptr) { /* Guard against node removal. */ ptr = __mmu_int_rb_iter_next(node, start, end - 1); - hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u", - node->addr, node->len); + trace_hfi1_mmu_mem_invalidate(node->addr, node->len); if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); /* move from LRU list to delete list */ diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index 92dc88f013c9..af50c0793450 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -51,3 +51,4 @@ #include "trace_rc.h" #include "trace_rx.h" #include "trace_tx.h" +#include "trace_mmu.h" diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h new file mode 100644 index 000000000000..3b7abbc382c2 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/trace_mmu.h @@ -0,0 +1,95 @@ +/* + * Copyright(c) 2017 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HFI1_TRACE_MMU_H + +#include +#include + +#include "hfi.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM hfi1_mmu + +DECLARE_EVENT_CLASS(hfi1_mmu_rb_template, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len), + TP_STRUCT__entry(__field(unsigned long, addr) + __field(unsigned long, len) + ), + TP_fast_assign(__entry->addr = addr; + __entry->len = len; + ), + TP_printk("MMU node addr 0x%lx, len %lu", + __entry->addr, + __entry->len + ) +); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate, + TP_PROTO(unsigned long addr, unsigned long len), + TP_ARGS(addr, len)); + +#endif /* __HFI1_TRACE_RC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mmu +#include diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h index c59809a7f121..c57af3b31fe1 100644 --- a/drivers/infiniband/hw/hfi1/trace_tx.h +++ b/drivers/infiniband/hw/hfi1/trace_tx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -198,6 +198,140 @@ TRACE_EVENT(hfi1_sdma_engine_select, ) ); +TRACE_EVENT(hfi1_sdma_user_free_queues, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt), + TP_ARGS(dd, ctxt, subctxt), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + ), + TP_printk("[%s] SDMA [%u:%u] Freeing user SDMA queues", + __get_str(dev), + __entry->ctxt, + __entry->subctxt + ) +); + +TRACE_EVENT(hfi1_sdma_user_process_request, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx), + TP_ARGS(dd, ctxt, subctxt, comp_idx), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + ), + TP_printk("[%s] SDMA [%u:%u] Using req/comp entry: %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx + ) +); + +DECLARE_EVENT_CLASS( + hfi1_sdma_value_template, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, u16 comp_idx, + u32 value), + TP_ARGS(dd, ctxt, subctxt, comp_idx, value), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + __field(u32, value) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + __entry->value = value; + ), + TP_printk("[%s] SDMA [%u:%u:%u] value: %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx, + __entry->value + ) +); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_initial_tidoffset, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 tidoffset), + TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset)); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_data_length, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 data_len), + TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len)); + +DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_compute_length, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 data_len), + TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len)); + +TRACE_EVENT(hfi1_sdma_user_tid_info, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + u16 comp_idx, u32 tidoffset, u32 units, u8 shift), + TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset, units, shift), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(u16, comp_idx) + __field(u32, tidoffset) + __field(u32, units) + __field(u8, shift) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->comp_idx = comp_idx; + __entry->tidoffset = tidoffset; + __entry->units = units; + __entry->shift = shift; + ), + TP_printk("[%s] SDMA [%u:%u:%u] TID offset %ubytes %uunits om %u", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->comp_idx, + __entry->tidoffset, + __entry->units, + __entry->shift + ) +); + +TRACE_EVENT(hfi1_sdma_request, + TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, + unsigned long dim), + TP_ARGS(dd, ctxt, subctxt, dim), + TP_STRUCT__entry(DD_DEV_ENTRY(dd) + __field(u16, ctxt) + __field(u16, subctxt) + __field(unsigned long, dim) + ), + TP_fast_assign(DD_DEV_ASSIGN(dd); + __entry->ctxt = ctxt; + __entry->subctxt = subctxt; + __entry->dim = dim; + ), + TP_printk("[%s] SDMA from %u:%u (%lu)", + __get_str(dev), + __entry->ctxt, + __entry->subctxt, + __entry->dim + ) +); + DECLARE_EVENT_CLASS(hfi1_sdma_engine_class, TP_PROTO(struct sdma_engine *sde, u64 status), TP_ARGS(sde, status), diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index dacb0fce49c6..c0c0e0445cbf 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -266,8 +266,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, { struct hfi1_user_sdma_pkt_q *pq; - hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit, - uctxt->ctxt, fd->subctxt); + trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); + pq = fd->pq; if (pq) { if (pq->handler) @@ -349,7 +349,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt, (u16 *)&info); - if (info.comp_idx >= hfi1_sdma_comp_ring_size) { hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Invalid comp index", @@ -386,8 +385,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, /* * All safety checks have been done and this request has been claimed. */ - hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit, - uctxt->ctxt, fd->subctxt, info.comp_idx); + trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx); req = pq->reqs + info.comp_idx; req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */ req->data_len = 0; @@ -487,7 +486,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) * (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ? KDETH_OM_LARGE : KDETH_OM_SMALL); - SDMA_DBG(req, "Initial TID offset %u", req->tidoffset); + trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->tidoffset); idx++; /* Save all the IO vector structures */ @@ -505,8 +505,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, } req->data_len += req->iovs[i].iov.iov_len; } - SDMA_DBG(req, "total data length %u", req->data_len); - + trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt, + info.comp_idx, req->data_len); if (pcount > req->info.npkts) pcount = req->info.npkts; /* @@ -661,7 +661,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req, } else { len = min(req->data_len - req->sent, (u32)req->info.fragsize); } - SDMA_DBG(req, "Data Length = %u", len); + trace_hfi1_sdma_user_compute_length(req->pq->dd, + req->pq->ctxt, + req->pq->subctxt, + req->info.comp_idx, + len); return len; } @@ -1231,9 +1235,10 @@ static int set_txreq_header(struct user_sdma_request *req, * Set the KDETH.OFFSET and KDETH.OM based on size of * transfer. */ - SDMA_DBG(req, "TID offset %ubytes %uunits om%u", - req->tidoffset, req->tidoffset >> omfactor, - omfactor != KDETH_OM_SMALL_SHIFT); + trace_hfi1_sdma_user_tid_info( + pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, + req->tidoffset, req->tidoffset >> omfactor, + omfactor != KDETH_OM_SMALL_SHIFT); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset >> omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, @@ -1441,8 +1446,6 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, u16 idx, enum hfi1_sdma_comp_state state, int ret) { - hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d", - pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret); if (state == ERROR) cq->comps[idx].errcode = -ret; smp_wmb(); /* make sure errcode is visible first */ diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 6c10484e972f..9b8bb5634c0d 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -110,9 +110,6 @@ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \ (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ ##__VA_ARGS__) -#define SDMA_Q_DBG(pq, fmt, ...) \ - hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \ - (pq)->subctxt, ##__VA_ARGS__) extern uint extended_psn; -- cgit v1.2.3 From 6167a5b4b5f97d9fc09a99842bd3aa149654ffcf Mon Sep 17 00:00:00 2001 From: Kamenee Arumugam Date: Mon, 28 Aug 2017 11:23:33 -0700 Subject: IB/qib: Stricter bounds checking for copy to buffer Replace 'strcpy' with 'strncpy' to restrict the number of bytes copied to the buffer. Reviewed-by: Michael J. Ruhl Signed-off-by: Kamenee Arumugam Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 92ae68c8e76f..14cadf6d6214 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6175,7 +6175,7 @@ static int setup_txselect(const char *str, struct kernel_param *kp) unsigned long val; char *n; - if (strlen(str) >= MAX_ATTEN_LEN) { + if (strlen(str) >= ARRAY_SIZE(txselect_list)) { pr_info("txselect_values string too long\n"); return -ENOSPC; } @@ -6186,7 +6186,7 @@ static int setup_txselect(const char *str, struct kernel_param *kp) TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); return -EINVAL; } - strcpy(txselect_list, str); + strncpy(txselect_list, str, ARRAY_SIZE(txselect_list) - 1); list_for_each_entry(dd, &qib_dev_list, list) if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) -- cgit v1.2.3 From 4b9796b0a6fbb6e4678002b3f09392192502fd14 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Mon, 28 Aug 2017 11:23:39 -0700 Subject: IB/hfi1: Use accessor to determine ring size The qp_stats print will soon be moving to rdmavt, so use the proper accessor to get the ring size rather than a driver supplied constant. Fixes: Commit ff8d836efe06 ("IB/hfi1: Add receiving queue info to qp_stats") Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index b87889d4e9e4..6daa39630dd5 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -659,7 +659,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->s_avail, /* ack_queue ring pointers, size */ qp->s_tail_ack_queue, qp->r_head_ack_queue, - HFI1_MAX_RDMA_ATOMIC, + rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi), /* remote QP info */ qp->remote_qpn, rdma_ah_get_dlid(&qp->remote_ah_attr), -- cgit v1.2.3 From 4734b4f417126e8773b3983122ca935d02af80de Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:45 -0700 Subject: IB/rdmavt: Add QP iterator API for QPs There are currently 3 spots in the qib and hfi1 driver that have knowledge of the internal QP hash list that should only be in scope to rdmavt QP code. Add an iterator API for processing all QPs to hide the nature of the RCU hashlist. The API consists of: - rvt_qp_iter_init() * For iterating QPs one at a time for seq_file semantics - rvt_qp_iter_next() * For iterating QPs one at a time for seq_file semantics - rvt_qp_iter() * For iterating all QPs The first two are used for things like seq_file prints. The last is for code that just needs to iterate all QPs in the system. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 144 ++++++++++++++++++++++++++++++++++++++ include/rdma/rdmavt_qp.h | 29 ++++++++ 2 files changed, 173 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3a238b00885e..9f70fd8665ab 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -2069,3 +2069,147 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t) return HRTIMER_NORESTART; } EXPORT_SYMBOL(rvt_rc_rnr_retry); + +/** + * rvt_qp_iter_init - initial for QP iteration + * @rdi - rvt devinfo + * @v - u64 value + * + * This returns an iterator suitable for iterating QPs + * in the system. + * + * The @cb is a user defined callback and @v is a 64 + * bit value passed to and relevant for processing in the + * @cb. An example use case would be to alter QP processing + * based on criteria not part of the rvt_qp. + * + * Use cases that require memory allocation to succeed + * must preallocate appropriately. + * + * Return: a pointer to an rvt_qp_iter or NULL + */ +struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)) +{ + struct rvt_qp_iter *i; + + i = kzalloc(sizeof(*i), GFP_KERNEL); + if (!i) + return NULL; + + i->rdi = rdi; + /* number of special QPs (SMI/GSI) for device */ + i->specials = rdi->ibdev.phys_port_cnt * 2; + i->v = v; + i->cb = cb; + + return i; +} +EXPORT_SYMBOL(rvt_qp_iter_init); + +/** + * rvt_qp_iter_next - return the next QP in iter + * @iter - the iterator + * + * Fine grained QP iterator suitable for use + * with debugfs seq_file mechanisms. + * + * Updates iter->qp with the current QP when the return + * value is 0. + * + * Return: 0 - iter->qp is valid 1 - no more QPs + */ +int rvt_qp_iter_next(struct rvt_qp_iter *iter) + __must_hold(RCU) +{ + int n = iter->n; + int ret = 1; + struct rvt_qp *pqp = iter->qp; + struct rvt_qp *qp; + struct rvt_dev_info *rdi = iter->rdi; + + /* + * The approach is to consider the special qps + * as additional table entries before the + * real hash table. Since the qp code sets + * the qp->next hash link to NULL, this works just fine. + * + * iter->specials is 2 * # ports + * + * n = 0..iter->specials is the special qp indices + * + * n = iter->specials..rdi->qp_dev->qp_table_size+iter->specials are + * the potential hash bucket entries + * + */ + for (; n < rdi->qp_dev->qp_table_size + iter->specials; n++) { + if (pqp) { + qp = rcu_dereference(pqp->next); + } else { + if (n < iter->specials) { + struct rvt_ibport *rvp; + int pidx; + + pidx = n % rdi->ibdev.phys_port_cnt; + rvp = rdi->ports[pidx]; + qp = rcu_dereference(rvp->qp[n & 1]); + } else { + qp = rcu_dereference( + rdi->qp_dev->qp_table[ + (n - iter->specials)]); + } + } + pqp = qp; + if (qp) { + iter->qp = qp; + iter->n = n; + return 0; + } + } + return ret; +} +EXPORT_SYMBOL(rvt_qp_iter_next); + +/** + * rvt_qp_iter - iterate all QPs + * @rdi - rvt devinfo + * @v - a 64 bit value + * @cb - a callback + * + * This provides a way for iterating all QPs. + * + * The @cb is a user defined callback and @v is a 64 + * bit value passed to and relevant for processing in the + * cb. An example use case would be to alter QP processing + * based on criteria not part of the rvt_qp. + * + * The code has an internal iterator to simplify + * non seq_file use cases. + */ +void rvt_qp_iter(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)) +{ + int ret; + struct rvt_qp_iter i = { + .rdi = rdi, + .specials = rdi->ibdev.phys_port_cnt * 2, + .v = v, + .cb = cb + }; + + rcu_read_lock(); + do { + ret = rvt_qp_iter_next(&i); + if (!ret) { + rvt_get_qp(i.qp); + rcu_read_unlock(); + i.cb(i.qp, i.v); + rcu_read_lock(); + rvt_put_qp(i.qp); + } + } while (!ret); + rcu_read_unlock(); +} +EXPORT_SYMBOL(rvt_qp_iter); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 8fbafb0ce674..dfeb311c30a1 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -673,4 +673,33 @@ void rvt_del_timers_sync(struct rvt_qp *qp); void rvt_stop_rc_timers(struct rvt_qp *qp); void rvt_add_retry_timer(struct rvt_qp *qp); +/** + * struct rvt_qp_iter - the iterator for QPs + * @qp - the current QP + * + * This structure defines the current iterator + * state for sequenced access to all QPs relative + * to an rvt_dev_info. + */ +struct rvt_qp_iter { + struct rvt_qp *qp; + /* private: backpointer */ + struct rvt_dev_info *rdi; + /* private: callback routine */ + void (*cb)(struct rvt_qp *qp, u64 v); + /* private: for arg to callback routine */ + u64 v; + /* private: number of SMI,GSI QPs for device */ + int specials; + /* private: current iterator index */ + int n; +}; + +struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)); +int rvt_qp_iter_next(struct rvt_qp_iter *iter); +void rvt_qp_iter(struct rvt_dev_info *rdi, + u64 v, + void (*cb)(struct rvt_qp *qp, u64 v)); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From dff2fe7e8cd92435a93555b29d9d95fed955c558 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:51 -0700 Subject: IB/hfi1: Convert hfi1_error_port_qps() to use new QP iterator Change hfi1_error_port_qps() to use the new rvt_qp_iter() in its QP scanning. Reviewed-by: Kaike Wan Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 79 +++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 6daa39630dd5..af711318f84b 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -869,6 +869,45 @@ void notify_error_qp(struct rvt_qp *qp) } } +/** + * hfi1_qp_iter_cb - callback for iterator + * @qp - the qp + * @v - the sl in low bits of v + * + * This is called from the iterator callback to work + * on an individual qp. + */ +static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v) +{ + int lastwqe; + struct ib_event ev; + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + u8 sl = (u8)v; + + if (qp->port_num != ppd->port || + (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) || + rdma_ah_get_sl(&qp->remote_ah_attr) != sl || + !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) + return; + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + /** * hfi1_error_port_qps - put a port's RC/UC qps into error state * @ibp: the ibport. @@ -880,44 +919,8 @@ void notify_error_qp(struct rvt_qp *qp) */ void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) { - struct rvt_qp *qp = NULL; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; - int n; - int lastwqe; - struct ib_event ev; - - rcu_read_lock(); - - /* Deal only with RC/UC qps that use the given SL. */ - for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { - for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) { - if (qp->port_num == ppd->port && - (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) && - rdma_ah_get_sl(&qp->remote_ah_attr) == sl && - (ib_rvt_state_ops[qp->state] & - RVT_POST_SEND_OK)) { - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - lastwqe = rvt_error_qp(qp, - IB_WC_WR_FLUSH_ERR); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - if (lastwqe) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = - IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, - qp->ibqp.qp_context); - } - } - } - } - rcu_read_unlock(); + rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb); } -- cgit v1.2.3 From e5c197ac35950659a44b2032d6b31defbe631bf9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:23:58 -0700 Subject: IB/hfi1: Convert qp_stats debugfs interface to use new iterator API Continue moving copy/paste code into rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/debugfs.c | 15 +++---- drivers/infiniband/hw/hfi1/qp.c | 83 +++--------------------------------- drivers/infiniband/hw/hfi1/qp.h | 23 +--------- 3 files changed, 14 insertions(+), 107 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index c6a472f01025..36ae1fd86502 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -1,4 +1,3 @@ -#ifdef CONFIG_DEBUG_FS /* * Copyright(c) 2015-2017 Intel Corporation. * @@ -267,10 +266,10 @@ DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) __acquires(RCU) { - struct qp_iter *iter; + struct rvt_qp_iter *iter; loff_t n = *pos; - iter = qp_iter_init(s->private); + iter = rvt_qp_iter_init(s->private, 0, NULL); /* stop calls rcu_read_unlock */ rcu_read_lock(); @@ -279,7 +278,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) return NULL; do { - if (qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -292,11 +291,11 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) __must_hold(RCU) { - struct qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; (*pos)++; - if (qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -312,7 +311,7 @@ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) { - struct qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; if (!iter) return 0; @@ -1535,5 +1534,3 @@ void hfi1_dbg_exit(void) debugfs_remove_recursive(hfi1_dbg_root); hfi1_dbg_root = NULL; } - -#endif diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index af711318f84b..4b01ccd895b4 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -530,82 +530,6 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) sc5); } -struct qp_iter { - struct hfi1_ibdev *dev; - struct rvt_qp *qp; - int specials; - int n; -}; - -struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) -{ - struct qp_iter *iter; - - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return NULL; - - iter->dev = dev; - iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - - return iter; -} - -int qp_iter_next(struct qp_iter *iter) -{ - struct hfi1_ibdev *dev = iter->dev; - int n = iter->n; - int ret = 1; - struct rvt_qp *pqp = iter->qp; - struct rvt_qp *qp; - - /* - * The approach is to consider the special qps - * as an additional table entries before the - * real hash table. Since the qp code sets - * the qp->next hash link to NULL, this works just fine. - * - * iter->specials is 2 * # ports - * - * n = 0..iter->specials is the special qp indices - * - * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are - * the potential hash bucket entries - * - */ - for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) { - if (pqp) { - qp = rcu_dereference(pqp->next); - } else { - if (n < iter->specials) { - struct hfi1_pportdata *ppd; - struct hfi1_ibport *ibp; - int pidx; - - pidx = n % dev->rdi.ibdev.phys_port_cnt; - ppd = &dd_from_dev(dev)->pport[pidx]; - ibp = &ppd->ibport_data; - - if (!(n & 1)) - qp = rcu_dereference(ibp->rvp.qp[0]); - else - qp = rcu_dereference(ibp->rvp.qp[1]); - } else { - qp = rcu_dereference( - dev->rdi.qp_dev->qp_table[ - (n - iter->specials)]); - } - } - pqp = qp; - if (qp) { - iter->qp = qp; - iter->n = n; - return 0; - } - } - return ret; -} - static const char * const qp_type_str[] = { "SMI", "GSI", "RC", "UC", "UD", }; @@ -619,7 +543,12 @@ static int qp_idle(struct rvt_qp *qp) qp->s_tail == qp->s_head; } -void qp_iter_print(struct seq_file *s, struct qp_iter *iter) +/** + * qp_iter_print - print the qp information to seq_file + * @s: the seq_file to emit the qp information on + * @iter: the iterator for the qp hash list + */ +void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { struct rvt_swqe *wqe; struct rvt_qp *qp = iter->qp; diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index 6fe542b6a927..c06d2f8348e0 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -1,7 +1,7 @@ #ifndef _QP_H #define _QP_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -94,26 +94,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag); struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5); struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5); -struct qp_iter; - -/** - * qp_iter_init - initialize the iterator for the qp hash list - * @dev: the hfi1_ibdev - */ -struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev); - -/** - * qp_iter_next - Find the next qp in the hash list - * @iter: the iterator for the qp hash list - */ -int qp_iter_next(struct qp_iter *iter); - -/** - * qp_iter_print - print the qp information to seq_file - * @s: the seq_file to emit the qp information on - * @iter: the iterator for the qp hash list - */ -void qp_iter_print(struct seq_file *s, struct qp_iter *iter); +void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); void _hfi1_schedule_send(struct rvt_qp *qp); void hfi1_schedule_send(struct rvt_qp *qp); -- cgit v1.2.3 From 557fafe1bfecb50c3da0bc4948ebcbc4d19f1619 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:24:04 -0700 Subject: IB/qib: Convert qp_stats debugfs interface to use new iterator API Continue porting copy/paste code into rdmavt from qib. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_debugfs.c | 18 +++++------- drivers/infiniband/hw/qib/qib_qp.c | 51 +++++---------------------------- drivers/infiniband/hw/qib/qib_verbs.h | 10 ++----- 3 files changed, 16 insertions(+), 63 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5bad8e3b40bb..5ed1ed93380f 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -1,6 +1,5 @@ -#ifdef CONFIG_DEBUG_FS /* - * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -191,10 +190,10 @@ DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) __acquires(RCU) { - struct qib_qp_iter *iter; + struct rvt_qp_iter *iter; loff_t n = *pos; - iter = qib_qp_iter_init(s->private); + iter = rvt_qp_iter_init(s->private, 0, NULL); /* stop calls rcu_read_unlock */ rcu_read_lock(); @@ -203,7 +202,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) return NULL; do { - if (qib_qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -216,11 +215,11 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) __must_hold(RCU) { - struct qib_qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; (*pos)++; - if (qib_qp_iter_next(iter)) { + if (rvt_qp_iter_next(iter)) { kfree(iter); return NULL; } @@ -236,7 +235,7 @@ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) { - struct qib_qp_iter *iter = iter_ptr; + struct rvt_qp_iter *iter = iter_ptr; if (!iter) return 0; @@ -284,6 +283,3 @@ void qib_dbg_exit(void) debugfs_remove_recursive(qib_dbg_root); qib_dbg_root = NULL; } - -#endif - diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index a343e3b5d4cb..344e401915f7 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -415,53 +415,16 @@ int qib_check_send_wqe(struct rvt_qp *qp, #ifdef CONFIG_DEBUG_FS -struct qib_qp_iter { - struct qib_ibdev *dev; - struct rvt_qp *qp; - int n; -}; - -struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) -{ - struct qib_qp_iter *iter; - - iter = kzalloc(sizeof(*iter), GFP_KERNEL); - if (!iter) - return NULL; - - iter->dev = dev; - - return iter; -} - -int qib_qp_iter_next(struct qib_qp_iter *iter) -{ - struct qib_ibdev *dev = iter->dev; - int n = iter->n; - int ret = 1; - struct rvt_qp *pqp = iter->qp; - struct rvt_qp *qp; - - for (; n < dev->rdi.qp_dev->qp_table_size; n++) { - if (pqp) - qp = rcu_dereference(pqp->next); - else - qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); - pqp = qp; - if (qp) { - iter->qp = qp; - iter->n = n; - return 0; - } - } - return ret; -} - static const char * const qp_type_str[] = { "SMI", "GSI", "RC", "UC", "UD", }; -void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +/** + * qib_qp_iter_print - print information to seq_file + * @s - the seq_file + * @iter - the iterator + */ +void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { struct rvt_swqe *wqe; struct rvt_qp *qp = iter->qp; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 95e370192948..f887737ac142 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -282,13 +282,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait); #ifdef CONFIG_DEBUG_FS -struct qib_qp_iter; - -struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); - -int qib_qp_iter_next(struct qib_qp_iter *iter); - -void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); +void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); #endif -- cgit v1.2.3 From 0208da90def5776cef940f9de4ffe6ecef346207 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 28 Aug 2017 11:24:10 -0700 Subject: IB/rdmavt: Handle dereg of inuse MRs properly A destroy of an MR prior to destroying the QP can cause the following diagnostic if the QP is referencing the MR being de-registered: hfi1 0000:05:00.0: hfi1_0: rvt_dereg_mr timeout mr ffff8808562108 00 pd ffff880859b20b00 The solution is to when the a non-zero refcount is encountered when the MR is destroyed the QPs needs to be iterated looking for QPs in the same PD as the MR. If rvt_qp_mr_clean() detects any such QP references the rkey/lkey, the QP needs to be put into an error state via a call to rvt_qp_error() which will trigger the clean up of any stuck references. This solution is as specified in IBTA 1.3 Volume 1 11.2.10.5. [This is reproduced with the 0.4.9 version of qperf and the rc_bw test] Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 121 ++++++++++++++++++++++++++++++++------ drivers/infiniband/sw/rdmavt/qp.c | 112 +++++++++++++++++++++++++++++++++-- include/rdma/rdmavt_mr.h | 3 + include/rdma/rdmavt_qp.h | 1 + 4 files changed, 216 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 1b3801f78e78..42713511b53b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -440,6 +440,105 @@ bail_umem: return ret; } +/** + * rvt_dereg_clean_qp_cb - callback from iterator + * @qp - the qp + * @v - the mregion (as u64) + * + * This routine fields the callback for all QPs and + * for QPs in the same PD as the MR will call the + * rvt_qp_mr_clean() to potentially cleanup references. + */ +static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) +{ + struct rvt_mregion *mr = (struct rvt_mregion *)v; + + /* skip PDs that are not ours */ + if (mr->pd != qp->ibqp.pd) + return; + rvt_qp_mr_clean(qp, mr->lkey); +} + +/** + * rvt_dereg_clean_qps - find QPs for reference cleanup + * @mr - the MR that is being deregistered + * + * This routine iterates RC QPs looking for references + * to the lkey noted in mr. + */ +static void rvt_dereg_clean_qps(struct rvt_mregion *mr) +{ + struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); + + rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb); +} + +/** + * rvt_check_refs - check references + * @mr - the megion + * @t - the caller identification + * + * This routine checks MRs holding a reference during + * when being de-registered. + * + * If the count is non-zero, the code calls a clean routine then + * waits for the timeout for the count to zero. + */ +static int rvt_check_refs(struct rvt_mregion *mr, const char *t) +{ + unsigned long timeout; + struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); + + if (percpu_ref_is_zero(&mr->refcount)) + return 0; + /* avoid dma mr */ + if (mr->lkey) + rvt_dereg_clean_qps(mr); + timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); + if (!timeout) { + rvt_pr_err(rdi, + "%s timeout mr %p pd %p lkey %x refcount %ld\n", + t, mr, mr->pd, mr->lkey, + atomic_long_read(&mr->refcount.count)); + rvt_get_mr(mr); + return -EBUSY; + } + return 0; +} + +/** + * rvt_mr_has_lkey - is MR + * @mr - the mregion + * @lkey - the lkey + */ +bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) +{ + return mr && lkey == mr->lkey; +} + +/** + * rvt_ss_has_lkey - is mr in sge tests + * @ss - the sge state + * @lkey + * + * This code tests for an MR in the indicated + * sge state. + */ +bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) +{ + int i; + bool rval = false; + + if (!ss->num_sge) + return rval; + /* first one */ + rval = rvt_mr_has_lkey(ss->sge.mr, lkey); + /* any others */ + for (i = 0; !rval && i < ss->num_sge - 1; i++) + rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey); + return rval; +} + /** * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free @@ -453,22 +552,14 @@ bail_umem: int rvt_dereg_mr(struct ib_mr *ibmr) { struct rvt_mr *mr = to_imr(ibmr); - struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); - int ret = 0; - unsigned long timeout; + int ret; rvt_free_lkey(&mr->mr); rvt_put_mr(&mr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); - if (!timeout) { - rvt_pr_err(rdi, - "rvt_dereg_mr timeout mr %p pd %p\n", - mr, mr->mr.pd); - rvt_get_mr(&mr->mr); - ret = -EBUSY; + ret = rvt_check_refs(&mr->mr, __func__); + if (ret) goto out; - } rvt_deinit_mregion(&mr->mr); if (mr->umem) ib_umem_release(mr->umem); @@ -761,16 +852,12 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { struct rvt_fmr *fmr = to_ifmr(ibfmr); int ret = 0; - unsigned long timeout; rvt_free_lkey(&fmr->mr); rvt_put_mr(&fmr->mr); /* will set completion if last */ - timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); - if (!timeout) { - rvt_get_mr(&fmr->mr); - ret = -EBUSY; + ret = rvt_check_refs(&fmr->mr, __func__); + if (ret) goto out; - } rvt_deinit_mregion(&fmr->mr); kfree(fmr); out: diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9f70fd8665ab..22df09ae809e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -458,10 +458,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) } } - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < rvt_max_atomic(rdi); n++) { + for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) { struct rvt_ack_entry *e = &qp->s_ack_queue[n]; if (e->rdma_sge.mr) { @@ -471,6 +468,113 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) } } +/** + * rvt_swqe_has_lkey - return true if lkey is used by swqe + * @wqe - the send wqe + * @lkey - the lkey + * + * Test the swqe for using lkey + */ +static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey) +{ + int i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + if (rvt_mr_has_lkey(sge->mr, lkey)) + return true; + } + return false; +} + +/** + * rvt_qp_sends_has_lkey - return true is qp sends use lkey + * @qp - the rvt_qp + * @lkey - the lkey + */ +static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey) +{ + u32 s_last = qp->s_last; + + while (s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last); + + if (rvt_swqe_has_lkey(wqe, lkey)) + return true; + + if (++s_last >= qp->s_size) + s_last = 0; + } + if (qp->s_rdma_mr) + if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey)) + return true; + return false; +} + +/** + * rvt_qp_acks_has_lkey - return true if acks have lkey + * @qp - the qp + * @lkey - the lkey + */ +static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey) +{ + int i; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[i]; + + if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey)) + return true; + } + return false; +} + +/* + * rvt_qp_mr_clean - clean up remote ops for lkey + * @qp - the qp + * @lkey - the lkey that is being de-registered + * + * This routine checks if the lkey is being used by + * the qp. + * + * If so, the qp is put into an error state to elminate + * any references from the qp. + */ +void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey) +{ + bool lastwqe = false; + + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + /* avoid special QPs */ + return; + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto check_lwqe; + + if (rvt_ss_has_lkey(&qp->r_sge, lkey) || + rvt_qp_sends_has_lkey(qp, lkey) || + rvt_qp_acks_has_lkey(qp, lkey)) + lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR); +check_lwqe: + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + /** * rvt_remove_qp - remove qp form table * @rdi: rvt dev struct diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index f418bd5571a5..72a3856d4057 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -191,4 +191,7 @@ static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length, } } +bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey); +bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey); + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index dfeb311c30a1..0eed3d8752fa 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -702,4 +702,5 @@ int rvt_qp_iter_next(struct rvt_qp_iter *iter); void rvt_qp_iter(struct rvt_dev_info *rdi, u64 v, void (*cb)(struct rvt_qp *qp, u64 v)); +void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 9eb7f8e44d13cd2a565a5f088c8a842810270757 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:49 -0400 Subject: IB/rxe: Move refcounting earlier in rxe_send() The network stack will call nskb's destructor, rxe_skb_tx_dtor(), if the packet gets dropped by ip_local_out()/ip6_local_out(). Thus we need to add the QP ref before output to avoid extra dereferences during network congestion. This could lead to unwanted destruction of the QP. Fix up the skb_out accounting, too. Fixes: fda85ce91240 ("IB/rxe: Fix kernel panic from skb destructor") Signed-off-by: Andrew Boyer Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 08f3f90d2912..0810f38bb1fa 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -460,12 +460,17 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) nskb->destructor = rxe_skb_tx_dtor; nskb->sk = pkt->qp->sk->sk; + rxe_add_ref(pkt->qp); + atomic_inc(&pkt->qp->skb_out); + if (av->network_type == RDMA_NETWORK_IPV4) { err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); } else if (av->network_type == RDMA_NETWORK_IPV6) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); } else { pr_err("Unknown layer 3 protocol: %d\n", av->network_type); + atomic_dec(&pkt->qp->skb_out); + rxe_drop_ref(pkt->qp); kfree_skb(nskb); return -EINVAL; } @@ -475,10 +480,7 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) return -EAGAIN; } - rxe_add_ref(pkt->qp); - atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); - return 0; } -- cgit v1.2.3 From bfc3ae05660e928b34034e668a82c334111c0c22 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:50 -0400 Subject: IB/rxe: Disable completion upcalls when a CQ is destroyed This prevents the stack from accessing userspace objects while they are being torn down. One possible sequence of events: - Userspace program exits - ib_uverbs_cleanup_ucontext() runs, calling ib_destroy_qp(), ib_destroy_cq(), etc. and releasing/freeing the UCQ - The QP still has tasklets running, so it isn't destroyed yet - The CQ is referenced by the QP, so the CQ isn't destroyed yet - The UCQ is kfree()'d anyway - A send work request completes - rxe_send_complete() calls cq->ibcq.comp_handler() - ib_uverbs_comp_handler() runs and crashes; the event queue is checked for is_closed, but it has no way to check the ib_ucq_object before accessing it The reference counting on the CQ doesn't protect against this since the CQ hasn't been destroyed yet. There's no available interface to deregister the UCQ from the CQ, and it didn't appear that attempting to add reference counting to the UCQ was going to be a good way to go since this solution is much simpler. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_cq.c | 19 +++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_loc.h | 2 ++ drivers/infiniband/sw/rxe/rxe_verbs.c | 2 ++ drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + 4 files changed, 24 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 49fe42c23f4d..c4aabf78dc90 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -69,6 +69,14 @@ err1: static void rxe_send_complete(unsigned long data) { struct rxe_cq *cq = (struct rxe_cq *)data; + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + if (cq->is_dying) { + spin_unlock_irqrestore(&cq->cq_lock, flags); + return; + } + spin_unlock_irqrestore(&cq->cq_lock, flags); cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } @@ -97,6 +105,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, if (udata) cq->is_user = 1; + cq->is_dying = false; + tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); spin_lock_init(&cq->cq_lock); @@ -156,6 +166,15 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) return 0; } +void rxe_cq_disable(struct rxe_cq *cq) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); + cq->is_dying = true; + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + void rxe_cq_cleanup(struct rxe_pool_entry *arg) { struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem); diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 6b65c0132289..8073f5858145 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -64,6 +64,8 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); +void rxe_cq_disable(struct rxe_cq *cq); + void rxe_cq_cleanup(struct rxe_pool_entry *arg); /* rxe_mcast.c */ diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e611d0c7314a..0b362f49a10a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -919,6 +919,8 @@ static int rxe_destroy_cq(struct ib_cq *ibcq) { struct rxe_cq *cq = to_rcq(ibcq); + rxe_cq_disable(cq); + rxe_drop_ref(cq); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5a180fbe40d9..b09a9e26ca73 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -89,6 +89,7 @@ struct rxe_cq { struct rxe_queue *queue; spinlock_t cq_lock; u8 notify; + bool is_dying; int is_user; struct tasklet_struct comp_task; }; -- cgit v1.2.3 From cffec53daf9f9a87e4e532da1dfaac4bbfcc2e29 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:51 -0400 Subject: IB/rxe: Remove dangling prototype Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Andrew Boyer Acked-by: Moni Shoua Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 8073f5858145..77b3ed0df936 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -221,8 +221,6 @@ static inline void rxe_advance_resp_resource(struct rxe_qp *qp) void retransmit_timer(unsigned long data); void rnr_nak_timer(unsigned long data); -void dump_qp(struct rxe_qp *qp); - /* rxe_srq.c */ #define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT) -- cgit v1.2.3 From d45d29567f87492e467ec854e0e81ad847c9b840 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:52 -0400 Subject: IB/rxe: Fix up the responder's find_resources() function The resource array is sized by max_dest_rd_atomic, not max_rd_atomic. Iterating over max_rd_atomic entries of qp->resp.resources[] will cause incorrect behavior when the two attributes are different (or even crash if max_rd_atomic is larger). Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Andrew Boyer Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index a958ee918a49..4240866a5331 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1055,7 +1055,7 @@ static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) { int i; - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; if (res->type == 0) -- cgit v1.2.3 From b9109b7ddb13a52b379c3e57d9b1d74d7445e94d Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:53 -0400 Subject: IB/rxe: Fix destination cache for IPv6 To successfully match an IPv6 path, the path cookie must match. Store it in the QP so that the IPv6 path can be reused. Replace open-coded version of dst_check() with the actual call, fixing the logic. The open-coded version skips the check call if dst->obsolete is 0 (DST_OBSOLETE_NONE), proceeding to replace the route. DST_OBSOLETE_NONE means that the route may continue to be used, though. Fixes: 4ed6ad1eb30e ("IB/rxe: Cache dst in QP instead of getting it...") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 7 ++++++- drivers/infiniband/sw/rxe/rxe_verbs.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0810f38bb1fa..a93ae3a90ff0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -191,7 +191,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, if (qp_type(qp) == IB_QPT_RC) dst = sk_dst_get(qp->sk->sk); - if (!dst || !(dst->obsolete && dst->ops->check(dst, 0))) { + if (!dst || !dst_check(dst, qp->dst_cookie)) { if (dst) dst_release(dst); @@ -209,6 +209,11 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; dst = rxe_find_route6(rxe->ndev, saddr6, daddr6); +#if IS_ENABLED(CONFIG_IPV6) + if (dst) + qp->dst_cookie = + rt6_get_cookie((struct rt6_info *)dst); +#endif } } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index b09a9e26ca73..0c2dbe45c729 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -248,6 +248,7 @@ struct rxe_qp { struct rxe_rq rq; struct socket *sk; + u32 dst_cookie; struct rxe_av pri_av; struct rxe_av alt_av; -- cgit v1.2.3 From 48c22be4abaf3925e8ff74fb3ce5214b082ca989 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:54 -0400 Subject: IB/rxe: Add dst_clone() in prepare_ipv6_hdr() Otherwise the reference count goes negative as IPv6 packets complete. Fixes: 4ed6ad1eb30e ("IB/rxe: Cache dst in QP instead of getting it...") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index a93ae3a90ff0..3fbbadc5eb47 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -342,7 +342,7 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); - skb_dst_set(skb, dst); + skb_dst_set(skb, dst_clone(dst)); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); -- cgit v1.2.3 From 825a51a4af82fd90c1fd98b080439d8593972457 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:55 -0400 Subject: IB/rxe: Fix up rxe_qp_cleanup() Replace sk_dst_get()/dst_release() in rxe_qp_cleanup() with sk_dst_reset(). sk_dst_get() takes a new reference on dst, so the dst_release() doesn't actually release the original reference, which was the design intent. Fixes: 4ed6ad1eb30e ("IB/rxe: Cache dst in QP instead of getting it...") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 80ccc7c7c341..00bda9380a2e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -851,13 +851,8 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg) qp->resp.mr = NULL; } - if (qp_type(qp) == IB_QPT_RC) { - struct dst_entry *dst = NULL; - - dst = sk_dst_get(qp->sk->sk); - if (dst) - dst_release(dst); - } + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); free_rd_atomic_resources(qp); -- cgit v1.2.3 From 2418adaed1ab614438d7995c500b1d219b138add Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:56 -0400 Subject: IB/rxe: Remove unneeded initialization in prepare6() Fixes: 4ed6ad1eb30e ("IB/rxe: Cache dst in QP instead of getting it...") Signed-off-by: Andrew Boyer Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 3fbbadc5eb47..3ba76e7c7a38 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -393,7 +393,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb, struct rxe_av *av) { struct rxe_qp *qp = pkt->qp; - struct dst_entry *dst = NULL; + struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; -- cgit v1.2.3 From 1223a1af75a85356ec5f5e5a097b60d26f25ff15 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:57 -0400 Subject: IB/rxe: Another fix for broken receive queue draining This fixes another path in rxe_requester() that might overlook stale SKBs, preventing cleanup. Fixes: 1217197142d1 ("rxe: fix broken receive queue draining") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index db7161456f45..d84222f9d5d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -594,8 +594,10 @@ int rxe_requester(void *arg) rxe_add_ref(qp); next_wqe: - if (unlikely(!qp->valid)) + if (unlikely(!qp->valid)) { + rxe_drain_req_pkts(qp, true); goto exit; + } if (unlikely(qp->req.state == QP_STATE_ERROR)) { rxe_drain_req_pkts(qp, true); -- cgit v1.2.3 From 13eb1e21d6198c9068eab7e7cb68c6d5c6834d1b Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:58 -0400 Subject: IB/rxe: Avoid ICRC errors by copying into the skb first The current process is to first calculate the CRC and then copy the client data into the packet. This leaves a window in which the packet contents and CRC can get out of sync, if the client changes the data after the CRC is calculated but before the data is copied. By copying the data into the packet and then calculating the CRC directly from the packet contents we eliminate the window. This can be seen with qperf's ud_bi_bw test. This seems like very strange/reckless client behavior, but whether the client has mangled its data or not RXE should be able to transfer it reliably. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index e37cc89987e1..5c2684bf430f 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -367,11 +367,11 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, dest = (dir == to_mem_obj) ? ((void *)(uintptr_t)iova) : addr; + memcpy(dest, src, length); + if (crcp) *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), - *crcp, src, length); - - memcpy(dest, src, length); + *crcp, dest, length); return 0; } @@ -401,11 +401,11 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, if (bytes > length) bytes = length; + memcpy(dest, src, bytes); + if (crcp) crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), - crc, src, bytes); - - memcpy(dest, src, bytes); + crc, dest, bytes); length -= bytes; addr += bytes; -- cgit v1.2.3 From 5c50f1d18f6bdd8ff8d274479b8bf0d89ab5a47d Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 28 Aug 2017 16:11:59 -0400 Subject: IB/rxe: Handle NETDEV_CHANGE events Without this fix, ports configured on top of ixgbe miss link up notifications. ibv_query_port() will continue to return IBV_PORT_DOWN even though the port is up and working. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 3ba76e7c7a38..59dee10bebcb 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -651,8 +651,13 @@ static int rxe_notify(struct notifier_block *not_blk, pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; - case NETDEV_REBOOT: case NETDEV_CHANGE: + if (netif_running(ndev) && netif_carrier_ok(ndev)) + rxe_port_up(rxe); + else + rxe_port_down(rxe); + break; + case NETDEV_REBOOT: case NETDEV_GOING_DOWN: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: -- cgit v1.2.3 From 1a56ff6daab1e062aadec582eb10e7090f0b370a Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:04 +0300 Subject: IB/core: Separate CQ handle in SRQ context Before this change CQ attached to SRQ was part of XRC specific extension. Moving CQ handle out makes it available to other types extending SRQ functionality. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 27 +++++++++++++++++---------- drivers/infiniband/core/verbs.c | 16 +++++++++------- drivers/infiniband/hw/mlx4/srq.c | 4 ++-- drivers/infiniband/hw/mlx5/main.c | 10 +++++----- drivers/infiniband/hw/mlx5/srq.c | 11 +++++++---- include/rdma/ib_verbs.h | 31 ++++++++++++++++++++----------- 6 files changed, 60 insertions(+), 39 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 8e9fea03dec4..9f690af46a7e 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3497,10 +3497,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + } - attr.ext.xrc.cq = uobj_get_obj_read(cq, cmd->cq_handle, - file->ucontext); - if (!attr.ext.xrc.cq) { + if (ib_srq_has_cq(cmd->srq_type)) { + attr.ext.cq = uobj_get_obj_read(cq, cmd->cq_handle, + file->ucontext); + if (!attr.ext.cq) { ret = -EINVAL; goto err_put_xrcd; } @@ -3535,10 +3537,13 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + if (ib_srq_has_cq(cmd->srq_type)) { + srq->ext.cq = attr.ext.cq; + atomic_inc(&attr.ext.cq->usecnt); + } + if (cmd->srq_type == IB_SRQT_XRC) { - srq->ext.xrc.cq = attr.ext.xrc.cq; srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; - atomic_inc(&attr.ext.xrc.cq->usecnt); atomic_inc(&attr.ext.xrc.xrcd->usecnt); } @@ -3561,10 +3566,12 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, goto err_copy; } - if (cmd->srq_type == IB_SRQT_XRC) { + if (cmd->srq_type == IB_SRQT_XRC) uobj_put_read(xrcd_uobj); - uobj_put_obj_read(attr.ext.xrc.cq); - } + + if (ib_srq_has_cq(cmd->srq_type)) + uobj_put_obj_read(attr.ext.cq); + uobj_put_obj_read(pd); uobj_alloc_commit(&obj->uevent.uobject); @@ -3577,8 +3584,8 @@ err_put: uobj_put_obj_read(pd); err_put_cq: - if (cmd->srq_type == IB_SRQT_XRC) - uobj_put_obj_read(attr.ext.xrc.cq); + if (ib_srq_has_cq(cmd->srq_type)) + uobj_put_obj_read(attr.ext.cq); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b29d0ff94463..ecb6c395f19b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -622,11 +622,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; + if (ib_srq_has_cq(srq->srq_type)) { + srq->ext.cq = srq_init_attr->ext.cq; + atomic_inc(&srq->ext.cq->usecnt); + } if (srq->srq_type == IB_SRQT_XRC) { srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; - srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; atomic_inc(&srq->ext.xrc.xrcd->usecnt); - atomic_inc(&srq->ext.xrc.cq->usecnt); } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); @@ -667,18 +669,18 @@ int ib_destroy_srq(struct ib_srq *srq) pd = srq->pd; srq_type = srq->srq_type; - if (srq_type == IB_SRQT_XRC) { + if (ib_srq_has_cq(srq_type)) + cq = srq->ext.cq; + if (srq_type == IB_SRQT_XRC) xrcd = srq->ext.xrc.xrcd; - cq = srq->ext.xrc.cq; - } ret = srq->device->destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); - if (srq_type == IB_SRQT_XRC) { + if (srq_type == IB_SRQT_XRC) atomic_dec(&xrcd->usecnt); + if (ib_srq_has_cq(srq_type)) atomic_dec(&cq->usecnt); - } } return ret; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index dd7a2fce9df4..ebee56cbc0e2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -178,8 +178,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - cqn = (init_attr->srq_type == IB_SRQT_XRC) ? - to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_mcq(init_attr->ext.cq)->mcq.cqn : 0; xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : (u16) dev->dev->caps.reserved_xrcds; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 62e6298810e7..7ad585257fd3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3217,7 +3217,7 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; - attr.ext.xrc.cq = devr->c0; + attr.ext.cq = devr->c0; attr.ext.xrc.xrcd = devr->x0; devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); @@ -3232,9 +3232,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->srq_context = NULL; devr->s0->srq_type = IB_SRQT_XRC; devr->s0->ext.xrc.xrcd = devr->x0; - devr->s0->ext.xrc.cq = devr->c0; + devr->s0->ext.cq = devr->c0; atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); - atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -3253,9 +3253,9 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s1->event_handler = NULL; devr->s1->srq_context = NULL; devr->s1->srq_type = IB_SRQT_BASIC; - devr->s1->ext.xrc.cq = devr->c0; + devr->s1->ext.cq = devr->c0; atomic_inc(&devr->p0->usecnt); - atomic_set(&devr->s0->usecnt, 0); + atomic_set(&devr->s1->usecnt, 0); for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { INIT_WORK(&devr->ports[port].pkey_change_work, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 30b3ddd8e1ab..e6be4f2927a7 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -292,13 +292,16 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) in.flags |= MLX5_SRQ_FLAG_WQ_SIG; - if (init_attr->srq_type == IB_SRQT_XRC) { + + if (init_attr->srq_type == IB_SRQT_XRC) in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; - in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn; - } else if (init_attr->srq_type == IB_SRQT_BASIC) { + else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + + if (ib_srq_has_cq(init_attr->srq_type)) + in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; + else in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; - } in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index cab0bdcfad51..f0e46757185b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -994,6 +994,11 @@ enum ib_srq_type { IB_SRQT_XRC }; +static inline bool ib_srq_has_cq(enum ib_srq_type srq_type) +{ + return srq_type == IB_SRQT_XRC; +} + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -1011,11 +1016,13 @@ struct ib_srq_init_attr { struct ib_srq_attr attr; enum ib_srq_type srq_type; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + } xrc; + }; } ext; }; @@ -1554,12 +1561,14 @@ struct ib_srq { enum ib_srq_type srq_type; atomic_t usecnt; - union { - struct { - struct ib_xrcd *xrcd; - struct ib_cq *cq; - u32 srq_num; - } xrc; + struct { + struct ib_cq *cq; + union { + struct { + struct ib_xrcd *xrcd; + u32 srq_num; + } xrc; + }; } ext; }; -- cgit v1.2.3 From 38eb44fac71729fabdef71166e72bee5964c10d6 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:07 +0300 Subject: IB/uverbs: Add new SRQ type IB_SRQT_TM Add new SRQ type capable of new tag matching feature. When SRQ receives a message it will search through the matching list for the corresponding posted receive buffer. The process of searching the matching list is called tag matching. In case the tag matching results in a match, the received message will be placed in the address specified by the receive buffer. In case no match was found the message will be placed in a generic buffer until the corresponding receive buffer will be posted. These messages are called unexpected and their set is called an unexpected list. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 9f690af46a7e..e69038a07fa0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1438,7 +1438,7 @@ static int create_qp(struct ib_uverbs_file *file, if (cmd->is_srq) { srq = uobj_get_obj_read(srq, cmd->srq_handle, file->ucontext); - if (!srq || srq->srq_type != IB_SRQT_BASIC) { + if (!srq || srq->srq_type == IB_SRQT_XRC) { ret = -EINVAL; goto err_put; } @@ -3481,6 +3481,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (IS_ERR(obj)) return PTR_ERR(obj); + if (cmd->srq_type == IB_SRQT_TM) + attr.ext.tag_matching.max_num_tags = cmd->max_num_tags; + if (cmd->srq_type == IB_SRQT_XRC) { xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle, file->ucontext); @@ -3615,6 +3618,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + memset(&xcmd, 0, sizeof(xcmd)); xcmd.response = cmd.response; xcmd.user_handle = cmd.user_handle; xcmd.srq_type = IB_SRQT_BASIC; -- cgit v1.2.3 From 8d50505ada728258fcdce99120b937ce68298c4e Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:08 +0300 Subject: IB/uverbs: Expose XRQ capabilities Make XRQ capabilities available via ibv_query_device() verb. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 10 ++++++++++ include/uapi/rdma/ib_user_verbs.h | 15 +++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e69038a07fa0..e0cb99860934 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3868,6 +3868,16 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.raw_packet_caps = attr.raw_packet_caps; resp.response_length += sizeof(resp.raw_packet_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps)) + goto end; + + resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size; + resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags; + resp.xrq_caps.max_ops = attr.xrq_caps.max_ops; + resp.xrq_caps.max_sge = attr.xrq_caps.max_sge; + resp.xrq_caps.flags = attr.xrq_caps.flags; + resp.response_length += sizeof(resp.xrq_caps); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index d5434bbf40c8..9a0b6479fe0c 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -236,6 +236,20 @@ struct ib_uverbs_rss_caps { __u32 reserved; }; +struct ib_uverbs_tm_caps { + /* Max size of rendezvous request message */ + __u32 max_rndv_hdr_size; + /* Max number of entries in tag matching list */ + __u32 max_num_tags; + /* TM flags */ + __u32 flags; + /* Max number of outstanding list operations */ + __u32 max_ops; + /* Max number of SGE in tag matching entry */ + __u32 max_sge; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; @@ -247,6 +261,7 @@ struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 raw_packet_caps; + struct ib_uverbs_tm_caps xrq_caps; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From eb761894351d0372248f2636c213d7b822e8775f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:09 +0300 Subject: IB/mlx5: Fill XRQ capabilities Provide driver specific values for XRQ capabilities. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 10 ++++++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7ad585257fd3..ab3c562d5ba7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -777,6 +777,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (MLX5_CAP_GEN(mdev, tag_matching)) { + props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; + props->xrq_caps.max_num_tags = + (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; + props->xrq_caps.flags = IB_TM_CAP_RC; + props->xrq_caps.max_ops = + 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); + props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; + } + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { resp.cqe_comp_caps.max_num = MLX5_CAP_GEN(dev->mdev, cqe_compression) ? diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b3380e8beacf..189e80cd6b2f 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -107,6 +107,11 @@ enum { MLX5_CQE_VERSION_V1, }; +enum { + MLX5_TM_MAX_RNDV_MSG_SIZE = 64, + MLX5_TM_MAX_SGE = 1, +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; -- cgit v1.2.3 From 3fd3307ef34fc9f7198af9249c763cf7a4ac653f Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Thu, 17 Aug 2017 15:52:11 +0300 Subject: IB/mlx5: Support IB_SRQT_TM Pass to mlx5_core flag to enable rendezvous offload, list_size and CQ when SRQ created with IB_SRQT_TM. Signed-off-by: Artemy Kovalyov Reviewed-by: Yossi Itigin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 9 +++++++-- drivers/infiniband/hw/mlx5/srq.c | 18 +++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index d6df88a78d5e..acb79d3a4f1d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1729,10 +1729,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); - else + } else { MLX5_SET(qpc, qpc, no_sq, 1); + if (init_attr->srq && + init_attr->srq->srq_type == IB_SRQT_TM) + MLX5_SET(qpc, qpc, offload_type, + MLX5_QPC_OFFLOAD_TYPE_RNDV); + } /* Set default resources */ switch (init_attr->qp_type) { diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index e6be4f2927a7..6d5fadad9090 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -101,7 +101,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, udata->inlen - sizeof(ucmd))) return -EINVAL; - if (in->type == IB_SRQT_XRC) { + if (in->type != IB_SRQT_BASIC) { err = get_srq_user_index(to_mucontext(pd->uobject->context), &ucmd, udata->inlen, &uidx); if (err) @@ -145,7 +145,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = uidx; return 0; @@ -205,7 +205,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && - in->type == IB_SRQT_XRC) + in->type != IB_SRQT_BASIC) in->user_index = MLX5_IB_DEFAULT_UIDX; return 0; @@ -298,6 +298,18 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, else in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; + if (init_attr->srq_type == IB_SRQT_TM) { + in.tm_log_list_size = + ilog2(init_attr->ext.tag_matching.max_num_tags) + 1; + if (in.tm_log_list_size > + MLX5_CAP_GEN(dev->mdev, log_tag_matching_list_sz)) { + mlx5_ib_dbg(dev, "TM SRQ max_num_tags exceeding limit\n"); + err = -EINVAL; + goto err_usr_kern_srq; + } + in.flags |= MLX5_SRQ_FLAG_RNDV; + } + if (ib_srq_has_cq(init_attr->srq_type)) in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; else -- cgit v1.2.3 From a0aa309c39de58b86b704654434431aeb5a8bdf1 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:55 +0300 Subject: IB/core: Add a generic way to execute an operation on a uobject The ioctl infrastructure treats all user-objects in the same manner. It gets objects ids from the user-space and by using the object type and type attributes mentioned in the object specification, it executes this required method. Passing an object id from the user-space as an attribute is carried out in three stages. The first is carried out before the actual handler and the last is carried out afterwards. The different supported operations are read, write, destroy and create. In the first stage, the former three actions just fetches the object from the repository (by using its id) and locks it. The last action allocates a new uobject. Afterwards, the second stage is carried out when the handler itself carries out the required modification of the object. The last stage is carried out after the handler finishes and commits the result. The former two operations just unlock the object. Destroy calls the "free object" operation, taking into account the object's type and releases the uobject as well. Creation just adds the new uobject to the repository, making the object visible to the application. In order to abstract these details from the ioctl infrastructure layer, we add uverbs_get_uobject_from_context and uverbs_finalize_object functions which corresponds to the first and last stages respectively. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 58 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/core/rdma_core.h | 17 +++++++++++ include/rdma/uverbs_ioctl.h | 52 +++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 include/rdma/uverbs_ioctl.h (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 41c31a2bf093..2bd58ff17bb8 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "uverbs.h" #include "core_priv.h" #include "rdma_core.h" @@ -625,3 +626,60 @@ const struct uverbs_obj_type_class uverbs_fd_class = { .needs_kfree_rcu = false, }; +struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, + struct ib_ucontext *ucontext, + enum uverbs_obj_access access, + int id) +{ + switch (access) { + case UVERBS_ACCESS_READ: + return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); + case UVERBS_ACCESS_DESTROY: + case UVERBS_ACCESS_WRITE: + return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); + case UVERBS_ACCESS_NEW: + return rdma_alloc_begin_uobject(type_attrs, ucontext); + default: + WARN_ON(true); + return ERR_PTR(-EOPNOTSUPP); + } +} + +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit) +{ + int ret = 0; + + /* + * refcounts should be handled at the object level and not at the + * uobject level. Refcounts of the objects themselves are done in + * handlers. + */ + + switch (access) { + case UVERBS_ACCESS_READ: + rdma_lookup_put_uobject(uobj, false); + break; + case UVERBS_ACCESS_WRITE: + rdma_lookup_put_uobject(uobj, true); + break; + case UVERBS_ACCESS_DESTROY: + if (commit) + ret = rdma_remove_commit_uobject(uobj); + else + rdma_lookup_put_uobject(uobj, true); + break; + case UVERBS_ACCESS_NEW: + if (commit) + ret = rdma_alloc_commit_uobject(uobj); + else + rdma_alloc_abort_uobject(uobj); + break; + default: + WARN_ON(true); + ret = -EOPNOTSUPP; + } + + return ret; +} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 1b82e7ff7fe8..97483d1a7336 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -75,4 +76,20 @@ void uverbs_uobject_put(struct ib_uobject *uobject); */ void uverbs_close_fd(struct file *f); +/* + * Get an ib_uobject that corresponds to the given id from ucontext, assuming + * the object is from the given type. Lock it to the required access when + * applicable. + * This function could create (access == NEW), destroy (access == DESTROY) + * or unlock (access == READ || access == WRITE) objects if required. + * The action will be finalized only when uverbs_finalize_object is called. + */ +struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, + struct ib_ucontext *ucontext, + enum uverbs_obj_access access, + int id); +int uverbs_finalize_object(struct ib_uobject *uobj, + enum uverbs_obj_access access, + bool commit); + #endif /* RDMA_CORE_H */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h new file mode 100644 index 000000000000..6885b92db4a8 --- /dev/null +++ b/include/rdma/uverbs_ioctl.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UVERBS_IOCTL_ +#define _UVERBS_IOCTL_ + +#include + +/* + * ======================================= + * Verbs action specifications + * ======================================= + */ + +enum uverbs_obj_access { + UVERBS_ACCESS_READ, + UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_NEW, + UVERBS_ACCESS_DESTROY +}; + +#endif + -- cgit v1.2.3 From f43dbebfa32041826299bdccae0352887fa007ea Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:56 +0300 Subject: IB/core: Add support to finalize objects in one transaction The new ioctl based infrastructure either commits or rollbacks all objects of the method as one transaction. In order to do that, we introduce a notion of dealing with a collection of objects that are related to a specific method. This also requires adding a notion of a method and attribute. A method contains a hash of attributes, where each bucket contains several attributes. The attributes are hashed according to their namespace which resides in the four upper bits of the id. For example, an object could be a CQ, which has an action of CREATE_CQ. This action has multiple attributes. For example, the CQ's new handle and the comp_channel. Each layer in this hierarchy - objects, methods and attributes is split into namespaces. The basic example for that is one namespace representing the default entities and another one representing the driver specific entities. When declaring these methods and attributes, we actually declare their specifications. When a method is executed, we actually allocates some space to hold auxiliary information. This auxiliary information contains meta-data about the required objects, such as pointers to their type information, pointers to the uobjects themselves (if exist), etc. The specification, along with the auxiliary information we allocated and filled is given to the finalize_objects function. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 40 ++++++++++++++++++++++++++++ drivers/infiniband/core/rdma_core.h | 22 ++++++++++++++- include/rdma/uverbs_ioctl.h | 53 +++++++++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 2bd58ff17bb8..0fe8ef913387 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -683,3 +683,43 @@ int uverbs_finalize_object(struct ib_uobject *uobj, return ret; } + +int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, + struct uverbs_attr_spec_hash * const *spec_hash, + size_t num, + bool commit) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < num; i++) { + struct uverbs_attr_bundle_hash *curr_bundle = + &attrs_bundle->hash[i]; + const struct uverbs_attr_spec_hash *curr_spec_bucket = + spec_hash[i]; + unsigned int j; + + for (j = 0; j < curr_bundle->num_attrs; j++) { + struct uverbs_attr *attr; + const struct uverbs_attr_spec *spec; + + if (!uverbs_attr_is_valid_in_hash(curr_bundle, j)) + continue; + + attr = &curr_bundle->attrs[j]; + spec = &curr_spec_bucket->attrs[j]; + + if (spec->type == UVERBS_ATTR_TYPE_IDR || + spec->type == UVERBS_ATTR_TYPE_FD) { + int current_ret; + + current_ret = uverbs_finalize_object(attr->obj_attr.uobject, + spec->obj.access, + commit); + if (!ret) + ret = current_ret; + } + } + } + return ret; +} diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 97483d1a7336..9ed6ad0324c7 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -82,7 +82,8 @@ void uverbs_close_fd(struct file *f); * applicable. * This function could create (access == NEW), destroy (access == DESTROY) * or unlock (access == READ || access == WRITE) objects if required. - * The action will be finalized only when uverbs_finalize_object is called. + * The action will be finalized only when uverbs_finalize_object or + * uverbs_finalize_objects are called. */ struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, struct ib_ucontext *ucontext, @@ -91,5 +92,24 @@ struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type int uverbs_finalize_object(struct ib_uobject *uobj, enum uverbs_obj_access access, bool commit); +/* + * Note that certain finalize stages could return a status: + * (a) alloc_commit could return a failure if the object is committed at the + * same time when the context is destroyed. + * (b) remove_commit could fail if the object wasn't destroyed successfully. + * Since multiple objects could be finalized in one transaction, it is very NOT + * recommended to have several finalize actions which have side effects. + * For example, it's NOT recommended to have a certain action which has both + * a commit action and a destroy action or two destroy objects in the same + * action. The rule of thumb is to have one destroy or commit action with + * multiple lookups. + * The first non zero return value of finalize_object is returned from this + * function. For example, this could happen when we couldn't destroy an + * object. + */ +int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, + struct uverbs_attr_spec_hash * const *spec_hash, + size_t num, + bool commit); #endif /* RDMA_CORE_H */ diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 6885b92db4a8..d3ec02b7d937 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -41,6 +41,12 @@ * ======================================= */ +enum uverbs_attr_type { + UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_IDR, + UVERBS_ATTR_TYPE_FD, +}; + enum uverbs_obj_access { UVERBS_ACCESS_READ, UVERBS_ACCESS_WRITE, @@ -48,5 +54,52 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; +struct uverbs_attr_spec { + enum uverbs_attr_type type; + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; +}; + +struct uverbs_attr_spec_hash { + size_t num_attrs; + struct uverbs_attr_spec attrs[0]; +}; + +struct uverbs_obj_attr { + struct ib_uobject *uobject; +}; + +struct uverbs_attr { + struct uverbs_obj_attr obj_attr; +}; + +struct uverbs_attr_bundle_hash { + /* if bit i is set, it means attrs[i] contains valid information */ + unsigned long *valid_bitmap; + size_t num_attrs; + /* + * arrays of attributes, each element corresponds to the specification + * of the attribute in the same index. + */ + struct uverbs_attr *attrs; +}; + +struct uverbs_attr_bundle { + size_t num_buckets; + struct uverbs_attr_bundle_hash hash[]; +}; + +static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash, + unsigned int idx) +{ + return test_bit(idx, attrs_hash->valid_bitmap); +} + #endif -- cgit v1.2.3 From c76161181193985087cd716fdf69b5cb6cf9ee85 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 29 Aug 2017 10:34:43 -0700 Subject: IB/cm: Fix sleeping in atomic when RoCE is used A couple of places in the CM do spin_lock_irq(&cm_id_priv->lock); ... if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) However when the underlying transport is RoCE, this leads to a sleeping function being called with the lock held - the callchain is cm_alloc_response_msg() -> ib_create_ah_from_wc() -> ib_init_ah_from_wc() -> rdma_addr_find_l2_eth_by_grh() -> rdma_resolve_ip() and rdma_resolve_ip() starts out by doing req = kzalloc(sizeof *req, GFP_KERNEL); not to mention rdma_addr_find_l2_eth_by_grh() doing wait_for_completion(&ctx.comp); to wait for the task that rdma_resolve_ip() queues up. Fix this by moving the AH creation out of the lock. Signed-off-by: Roland Dreier Reviewed-by: Sean Hefty Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fa3b0a428195..4c4b46586af2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -373,11 +373,19 @@ out: return ret; } -static int cm_alloc_response_msg(struct cm_port *port, - struct ib_mad_recv_wc *mad_recv_wc, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc) +{ + return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, + 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, + GFP_ATOMIC, + IB_MGMT_BASE_VERSION); +} + +static int cm_create_response_msg_ah(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + struct ib_mad_send_buf *msg) { - struct ib_mad_send_buf *m; struct ib_ah *ah; ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, @@ -385,27 +393,40 @@ static int cm_alloc_response_msg(struct cm_port *port, if (IS_ERR(ah)) return PTR_ERR(ah); - m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, - 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, - GFP_ATOMIC, - IB_MGMT_BASE_VERSION); - if (IS_ERR(m)) { - rdma_destroy_ah(ah); - return PTR_ERR(m); - } - m->ah = ah; - *msg = m; + msg->ah = ah; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { - rdma_destroy_ah(msg->ah); + if (msg->ah) + rdma_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); } +static int cm_alloc_response_msg(struct cm_port *port, + struct ib_mad_recv_wc *mad_recv_wc, + struct ib_mad_send_buf **msg) +{ + struct ib_mad_send_buf *m; + int ret; + + m = cm_alloc_response_msg_no_ah(port, mad_recv_wc); + if (IS_ERR(m)) + return PTR_ERR(m); + + ret = cm_create_response_msg_ah(port, mad_recv_wc, m); + if (ret) { + cm_free_msg(m); + return ret; + } + + *msg = m; + return 0; +} + static void * cm_copy_private_data(const void *private_data, u8 private_data_len) { @@ -2497,7 +2518,8 @@ static int cm_dreq_handler(struct cm_work *work) case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); - if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); + if (IS_ERR(msg)) goto unlock; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, @@ -2505,7 +2527,8 @@ static int cm_dreq_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); - if (ib_post_send_mad(msg, NULL)) + if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || + ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: @@ -3083,7 +3106,8 @@ static int cm_lap_handler(struct cm_work *work) case IB_CM_MRA_LAP_SENT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); - if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) + msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc); + if (IS_ERR(msg)) goto unlock; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, @@ -3093,7 +3117,8 @@ static int cm_lap_handler(struct cm_work *work) cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); - if (ib_post_send_mad(msg, NULL)) + if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || + ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: -- cgit v1.2.3 From 79364227e6b4923478e99d8480d62482b588ef84 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 29 Aug 2017 10:34:44 -0700 Subject: IB/core: Add might_sleep() annotation to ib_init_ah_from_wc() For RoCE, ib_init_ah_from_wc() can follow the path ib_init_ah_from_wc() -> rdma_addr_find_l2_eth_by_grh() -> rdma_resolve_ip() and rdma_resolve_ip() will sleep in kzalloc() and wait_for_completion(). However, developers will not see any warnings if they use ib_init_ah_from_wc() in an atomic context and test only on IB, because the function doesn't sleep in that case. Add a might_sleep() so that lockdep will catch bugs no matter what hardware is used to test. Signed-off-by: Roland Dreier Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ecb6c395f19b..ee9e27dc799b 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -468,6 +468,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, union ib_gid dgid; union ib_gid sgid; + might_sleep(); + memset(ah_attr, 0, sizeof *ah_attr); ah_attr->type = rdma_ah_find_type(device, port_num); if (rdma_cap_eth_ah(device, port_num)) { -- cgit v1.2.3 From 72f9b089ecd2cc2194d27cbb14fd80a0b1472e89 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Tue, 29 Aug 2017 15:51:29 -0700 Subject: RDMA/vmw_pvrdma: Report network header type in WC We should report the network header type in the work completion so that the kernel can infer the right RoCE type headers. Reviewed-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 1 + include/uapi/rdma/vmw_pvrdma-abi.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 90aa326fd7c0..8a12dc73b68e 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -389,6 +389,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; + wc->network_hdr_type = cqe->network_hdr_type; /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index c8c1d2d6df4d..c6569b0032ec 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -125,7 +125,8 @@ enum pvrdma_wc_flags { PVRDMA_WC_IP_CSUM_OK = 1 << 3, PVRDMA_WC_WITH_SMAC = 1 << 4, PVRDMA_WC_WITH_VLAN = 1 << 5, - PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN, + PVRDMA_WC_WITH_NETWORK_HDR_TYPE = 1 << 6, + PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; struct pvrdma_alloc_ucontext_resp { @@ -283,7 +284,8 @@ struct pvrdma_cqe { __u8 dlid_path_bits; __u8 port_num; __u8 smac[6]; - __u8 reserved2[7]; /* Pad to next power of 2 (64). */ + __u8 network_hdr_type; + __u8 reserved2[6]; /* Pad to next power of 2 (64). */ }; #endif /* __VMW_PVRDMA_ABI_H__ */ -- cgit v1.2.3 From 14d6c3a83fbcd9c3d19e24d8d5820a912f2615c9 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Tue, 29 Aug 2017 15:51:30 -0700 Subject: RDMA/vmw_pvrdma: Fix a signedness Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 8a12dc73b68e..3562c0c30492 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -299,7 +299,7 @@ static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) { - int head; + unsigned int head; int has_data; if (!cq->is_kernel) -- cgit v1.2.3 From fac9658cabb98afb68ef1630c558864e6f559c07 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:57 +0300 Subject: IB/core: Add new ioctl interface In this ioctl interface, processing the command starts from properties of the command and fetching the appropriate user objects before calling the handler. Parsing and validation is done according to a specifier declared by the driver's code. In the driver, all supported objects are declared. These objects are separated to different object namepsaces. Dividing objects to namespaces is done at initialization by using the higher bits of the object ids. This initialization can mix objects declared in different places to one parsing tree using in this ioctl interface. For each object we list all supported methods. Similarly to objects, methods are separated to method namespaces too. Namespacing is done similarly to the objects case. This could be used in order to add methods to an existing object. Each method has a specific handler, which could be either a default handler or a driver specific handler. Along with the handler, a bunch of attributes are specified as well. Similarly to objects and method, attributes are namespaced and hashed by their ids at initialization too. All supported attributes are subject to automatic fetching and validation. These attributes include the command, response and the method's related objects' ids. When these entities (objects, methods and attributes) are used, the high bits of the entities ids are used in order to calculate the hash bucket index. Then, these high bits are masked out in order to have a zero based index. Since we use these high bits for both bucketing and namespacing, we get a compact representation and O(1) array access. This is mandatory for efficient dispatching. Each attribute has a type (PTR_IN, PTR_OUT, IDR and FD) and a length. Attributes could be validated through some attributes, like: (*) Minimum size / Exact size (*) Fops for FD (*) Object type for IDR If an IDR/fd attribute is specified, the kernel also states the object type and the required access (NEW, WRITE, READ or DESTROY). All uobject/fd management is done automatically by the infrastructure, meaning - the infrastructure will fail concurrent commands that at least one of them requires concurrent access (WRITE/DESTROY), synchronize actions with device removals (dissociate context events) and take care of reference counting (increase/decrease) for concurrent actions invocation. The reference counts on the actual kernel objects shall be handled by the handlers. objects +--------+ | | | | methods +--------+ | | ns method method_spec +-----+ |len | +--------+ +------+[d]+-------+ +----------------+[d]+------------+ |attr1+-> |type | | object +> |method+-> | spec +-> + attr_buckets +-> |default_chain+--> +-----+ |idr_type| +--------+ +------+ |handler| | | +------------+ |attr2| |access | | | | | +-------+ +----------------+ |driver chain| +-----+ +--------+ | | | | +------------+ | | +------+ | | | | | | | | | | | | | | | | | | | | +--------+ [d] = Hash ids to groups using the high order bits The right types table is also chosen by using the high bits from the ids. Currently we have either default or driver specific groups. Once validation and object fetching (or creation) completed, we call the handler: int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, struct uverbs_attr_bundle *ctx); ctx bundles attributes of different namespaces. Each element there is an array of attributes which corresponds to one namespaces of attributes. For example, in the usually used case: ctx core +----------------------------+ +------------+ | core: +---> | valid | +----------------------------+ | cmd_attr | | driver: | +------------+ |----------------------------+--+ | valid | | | cmd_attr | | +------------+ | | valid | | | obj_attr | | +------------+ | | drivers | +------------+ +> | valid | | cmd_attr | +------------+ | valid | | cmd_attr | +------------+ | valid | | obj_attr | +------------+ Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 2 +- drivers/infiniband/core/rdma_core.c | 46 +++++ drivers/infiniband/core/rdma_core.h | 5 + drivers/infiniband/core/uverbs_ioctl.c | 364 +++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 2 + include/rdma/uverbs_ioctl.h | 101 ++++++++- include/uapi/rdma/rdma_user_ioctl.h | 33 +++ 7 files changed, 543 insertions(+), 10 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_ioctl.c (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 920609a0872e..746756dc9877 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -32,4 +32,4 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ - rdma_core.o uverbs_std_types.o + rdma_core.o uverbs_std_types.o uverbs_ioctl.o diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 0fe8ef913387..2a2f002ac7cb 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -36,10 +36,56 @@ #include #include #include +#include #include "uverbs.h" #include "core_priv.h" #include "rdma_core.h" +int uverbs_ns_idx(u16 *id, unsigned int ns_count) +{ + int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT; + + if (ret >= ns_count) + return -EINVAL; + + *id &= ~UVERBS_ID_NS_MASK; + return ret; +} + +const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, + uint16_t object) +{ + const struct uverbs_root_spec *object_hash = ibdev->specs_root; + const struct uverbs_object_spec_hash *objects; + int ret = uverbs_ns_idx(&object, object_hash->num_buckets); + + if (ret < 0) + return NULL; + + objects = object_hash->object_buckets[ret]; + + if (object >= objects->num_objects) + return NULL; + + return objects->objects[object]; +} + +const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, + uint16_t method) +{ + const struct uverbs_method_spec_hash *methods; + int ret = uverbs_ns_idx(&method, object->num_buckets); + + if (ret < 0) + return NULL; + + methods = object->method_buckets[ret]; + if (method >= methods->num_methods) + return NULL; + + return methods->methods[method]; +} + void uverbs_uobject_get(struct ib_uobject *uobject) { kref_get(&uobject->ref); diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h index 9ed6ad0324c7..1efcf93238dd 100644 --- a/drivers/infiniband/core/rdma_core.h +++ b/drivers/infiniband/core/rdma_core.h @@ -43,6 +43,11 @@ #include #include +int uverbs_ns_idx(u16 *id, unsigned int ns_count); +const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, + uint16_t object); +const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, + uint16_t method); /* * These functions initialize the context and cleanups its uobjects. * The context has a list of objects which is protected by a mutex diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c new file mode 100644 index 000000000000..5286ad57d903 --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "rdma_core.h" +#include "uverbs.h" + +static int uverbs_process_attr(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattr, + u16 attr_id, + const struct uverbs_attr_spec_hash *attr_spec_bucket, + struct uverbs_attr_bundle_hash *attr_bundle_h, + struct ib_uverbs_attr __user *uattr_ptr) +{ + const struct uverbs_attr_spec *spec; + struct uverbs_attr *e; + const struct uverbs_object_spec *object; + struct uverbs_obj_attr *o_attr; + struct uverbs_attr *elements = attr_bundle_h->attrs; + + if (uattr->reserved) + return -EINVAL; + + if (attr_id >= attr_spec_bucket->num_attrs) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) + return -EINVAL; + else + return 0; + } + + spec = &attr_spec_bucket->attrs[attr_id]; + e = &elements[attr_id]; + e->uattr = uattr_ptr; + + switch (spec->type) { + case UVERBS_ATTR_TYPE_PTR_IN: + case UVERBS_ATTR_TYPE_PTR_OUT: + if (uattr->len < spec->len || + (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) && + uattr->len > spec->len)) + return -EINVAL; + + e->ptr_attr.data = uattr->data; + e->ptr_attr.len = uattr->len; + e->ptr_attr.flags = uattr->flags; + break; + + case UVERBS_ATTR_TYPE_IDR: + if (uattr->data >> 32) + return -EINVAL; + /* fall through */ + case UVERBS_ATTR_TYPE_FD: + if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) + return -EINVAL; + + o_attr = &e->obj_attr; + object = uverbs_get_object(ibdev, spec->obj.obj_type); + if (!object) + return -EINVAL; + o_attr->type = object->type_attrs; + + o_attr->id = (int)uattr->data; + o_attr->uobject = uverbs_get_uobject_from_context( + o_attr->type, + ucontext, + spec->obj.access, + o_attr->id); + + if (IS_ERR(o_attr->uobject)) + return PTR_ERR(o_attr->uobject); + + if (spec->obj.access == UVERBS_ACCESS_NEW) { + u64 id = o_attr->uobject->id; + + /* Copy the allocated id to the user-space */ + if (put_user(id, &e->uattr->data)) { + uverbs_finalize_object(o_attr->uobject, + UVERBS_ACCESS_NEW, + false); + return -EFAULT; + } + } + + break; + default: + return -EOPNOTSUPP; + } + + set_bit(attr_id, attr_bundle_h->valid_bitmap); + return 0; +} + +static int uverbs_uattrs_process(struct ib_device *ibdev, + struct ib_ucontext *ucontext, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + const struct uverbs_method_spec *method, + struct uverbs_attr_bundle *attr_bundle, + struct ib_uverbs_attr __user *uattr_ptr) +{ + size_t i; + int ret = 0; + int num_given_buckets = 0; + + for (i = 0; i < num_uattrs; i++) { + const struct ib_uverbs_attr *uattr = &uattrs[i]; + u16 attr_id = uattr->attr_id; + struct uverbs_attr_spec_hash *attr_spec_bucket; + + ret = uverbs_ns_idx(&attr_id, method->num_buckets); + if (ret < 0) { + if (uattr->flags & UVERBS_ATTR_F_MANDATORY) { + uverbs_finalize_objects(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); + return ret; + } + continue; + } + + /* + * ret is the found ns, so increase num_given_buckets if + * necessary. + */ + if (ret >= num_given_buckets) + num_given_buckets = ret + 1; + + attr_spec_bucket = method->attr_buckets[ret]; + ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id, + attr_spec_bucket, &attr_bundle->hash[ret], + uattr_ptr++); + if (ret) { + uverbs_finalize_objects(attr_bundle, + method->attr_buckets, + num_given_buckets, + false); + return ret; + } + } + + return num_given_buckets; +} + +static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, + struct uverbs_attr_bundle *attr_bundle) +{ + unsigned int i; + + for (i = 0; i < attr_bundle->num_buckets; i++) { + struct uverbs_attr_spec_hash *attr_spec_bucket = + method_spec->attr_buckets[i]; + + if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, + attr_bundle->hash[i].valid_bitmap, + attr_spec_bucket->num_attrs)) + return -EINVAL; + } + + return 0; +} + +static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, + const struct ib_uverbs_attr *uattrs, + size_t num_uattrs, + struct ib_device *ibdev, + struct ib_uverbs_file *ufile, + const struct uverbs_method_spec *method_spec, + struct uverbs_attr_bundle *attr_bundle) +{ + int ret; + int finalize_ret; + int num_given_buckets; + + num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, + num_uattrs, method_spec, + attr_bundle, uattr_ptr); + if (num_given_buckets <= 0) + return -EINVAL; + + attr_bundle->num_buckets = num_given_buckets; + ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); + if (ret) + goto cleanup; + + ret = method_spec->handler(ibdev, ufile, attr_bundle); +cleanup: + finalize_ret = uverbs_finalize_objects(attr_bundle, + method_spec->attr_buckets, + attr_bundle->num_buckets, + !ret); + + return ret ? ret : finalize_ret; +} + +#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 +static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct ib_uverbs_ioctl_hdr *hdr, + void __user *buf) +{ + const struct uverbs_object_spec *object_spec; + const struct uverbs_method_spec *method_spec; + long err = 0; + unsigned int i; + struct { + struct ib_uverbs_attr *uattrs; + struct uverbs_attr_bundle *uverbs_attr_bundle; + } *ctx = NULL; + struct uverbs_attr *curr_attr; + unsigned long *curr_bitmap; + size_t ctx_size; +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; +#endif + + if (hdr->reserved) + return -EINVAL; + + object_spec = uverbs_get_object(ib_dev, hdr->object_id); + if (!object_spec) + return -EOPNOTSUPP; + + method_spec = uverbs_get_method(object_spec, hdr->method_id); + if (!method_spec) + return -EOPNOTSUPP; + + if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) + return -EINVAL; + + ctx_size = sizeof(*ctx) + + sizeof(struct uverbs_attr_bundle) + + sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets + + sizeof(*ctx->uattrs) * hdr->num_attrs + + sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) * + method_spec->num_child_attrs + + sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) * + (method_spec->num_child_attrs / BITS_PER_LONG + + method_spec->num_buckets); + +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ) + ctx = (void *)data; + + if (!ctx) +#endif + ctx = kmalloc(ctx_size, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx); + ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) + + (sizeof(ctx->uverbs_attr_bundle->hash[0]) * + method_spec->num_buckets); + curr_attr = (void *)(ctx->uattrs + hdr->num_attrs); + curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs); + + /* + * We just fill the pointers and num_attrs here. The data itself will be + * filled at a later stage (uverbs_process_attr) + */ + for (i = 0; i < method_spec->num_buckets; i++) { + unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs; + + ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr; + curr_attr += curr_num_attrs; + ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs; + ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap; + bitmap_zero(curr_bitmap, curr_num_attrs); + curr_bitmap += BITS_TO_LONGS(curr_num_attrs); + } + + err = copy_from_user(ctx->uattrs, buf, + sizeof(*ctx->uattrs) * hdr->num_attrs); + if (err) { + err = -EFAULT; + goto out; + } + + err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, + file, method_spec, ctx->uverbs_attr_bundle); +out: +#ifdef UVERBS_OPTIMIZE_USING_STACK_SZ + if (ctx_size > UVERBS_OPTIMIZE_USING_STACK_SZ) +#endif + kfree(ctx); + return err; +} + +#define IB_UVERBS_MAX_CMD_SZ 4096 + +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_ioctl_hdr __user *user_hdr = + (struct ib_uverbs_ioctl_hdr __user *)arg; + struct ib_uverbs_ioctl_hdr hdr; + struct ib_device *ib_dev; + int srcu_key; + long err; + + srcu_key = srcu_read_lock(&file->device->disassociate_srcu); + ib_dev = srcu_dereference(file->device->ib_dev, + &file->device->disassociate_srcu); + if (!ib_dev) { + err = -EIO; + goto out; + } + + if (cmd == RDMA_VERBS_IOCTL) { + err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); + + if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || + hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { + err = -EINVAL; + goto out; + } + + if (hdr.reserved) { + err = -EOPNOTSUPP; + goto out; + } + + err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, + (__user void *)arg + sizeof(hdr)); + } else { + err = -ENOIOCTLCMD; + } +out: + srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); + + return err; +} diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1b4bb8743969..e6df68048517 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2348,6 +2348,8 @@ struct ib_device { void (*get_dev_fw_str)(struct ib_device *, char *str); const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, int comp_vector); + + struct uverbs_root_spec *specs_root; }; struct ib_client { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index d3ec02b7d937..f83f56329761 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -43,6 +43,8 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_NA, + UVERBS_ATTR_TYPE_PTR_IN, + UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, }; @@ -54,29 +56,110 @@ enum uverbs_obj_access { UVERBS_ACCESS_DESTROY }; +enum { + UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0, + /* Support extending attributes by length */ + UVERBS_ATTR_SPEC_F_MIN_SZ = 1U << 1, +}; + struct uverbs_attr_spec { enum uverbs_attr_type type; - struct { - /* - * higher bits mean the namespace and lower bits mean - * the type id within the namespace. - */ - u16 obj_type; - u8 access; - } obj; + union { + u16 len; + struct { + /* + * higher bits mean the namespace and lower bits mean + * the type id within the namespace. + */ + u16 obj_type; + u8 access; + } obj; + }; + /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */ + u8 flags; }; struct uverbs_attr_spec_hash { size_t num_attrs; + unsigned long *mandatory_attrs_bitmask; struct uverbs_attr_spec attrs[0]; }; +struct uverbs_attr_bundle; +struct ib_uverbs_file; + +enum { + /* + * Action marked with this flag creates a context (or root for all + * objects). + */ + UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0, +}; + +struct uverbs_method_spec { + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_buckets; + size_t num_child_attrs; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); + struct uverbs_attr_spec_hash *attr_buckets[0]; +}; + +struct uverbs_method_spec_hash { + size_t num_methods; + struct uverbs_method_spec *methods[0]; +}; + +struct uverbs_object_spec { + const struct uverbs_obj_type *type_attrs; + size_t num_buckets; + struct uverbs_method_spec_hash *method_buckets[0]; +}; + +struct uverbs_object_spec_hash { + size_t num_objects; + struct uverbs_object_spec *objects[0]; +}; + +struct uverbs_root_spec { + size_t num_buckets; + struct uverbs_object_spec_hash *object_buckets[0]; +}; + +/* ================================================= + * Parsing infrastructure + * ================================================= + */ + +struct uverbs_ptr_attr { + union { + u64 data; + void __user *ptr; + }; + u16 len; + /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ + u16 flags; +}; + struct uverbs_obj_attr { + /* pointer to the kernel descriptor -> type, access, etc */ + const struct uverbs_obj_type *type; struct ib_uobject *uobject; + /* fd or id in idr of this object */ + int id; }; struct uverbs_attr { - struct uverbs_obj_attr obj_attr; + /* + * pointer to the user-space given attribute, in order to write the + * new uobject's id or update flags. + */ + struct ib_uverbs_attr __user *uattr; + union { + struct uverbs_ptr_attr ptr_attr; + struct uverbs_obj_attr obj_attr; + }; }; struct uverbs_attr_bundle_hash { diff --git a/include/uapi/rdma/rdma_user_ioctl.h b/include/uapi/rdma/rdma_user_ioctl.h index 9388125ad51b..165a27e969d5 100644 --- a/include/uapi/rdma/rdma_user_ioctl.h +++ b/include/uapi/rdma/rdma_user_ioctl.h @@ -43,6 +43,39 @@ /* Legacy name, for user space application which already use it */ #define IB_IOCTL_MAGIC RDMA_IOCTL_MAGIC +#define RDMA_VERBS_IOCTL \ + _IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr) + +#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +enum { + /* User input */ + UVERBS_ATTR_F_MANDATORY = 1U << 0, + /* + * Valid output bit should be ignored and considered set in + * mandatory fields. This bit is kernel output. + */ + UVERBS_ATTR_F_VALID_OUTPUT = 1U << 1, +}; + +struct ib_uverbs_attr { + __u16 attr_id; /* command specific type attribute */ + __u16 len; /* only for pointers */ + __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ + __u16 reserved; + __u64 data; /* ptr to command, inline data or idr/fd */ +}; + +struct ib_uverbs_ioctl_hdr { + __u16 length; + __u16 object_id; + __u16 method_id; + __u16 num_attrs; + __u64 reserved; + struct ib_uverbs_attr attrs[0]; +}; + /* * General blocks assignments * It is closed on purpose do not expose it it user space -- cgit v1.2.3 From 5009010fbf54bdc27e57baca490e1f9d6a4609e0 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:58 +0300 Subject: IB/core: Declare an object instead of declaring only type attributes Switch all uverbs_type_attrs_xxxx with DECLARE_UVERBS_OBJECT macros. This will be later used in order to embed the object specific methods in the objects as well. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 112 +++++++++++++---------------- include/rdma/uverbs_ioctl.h | 16 +++++ include/rdma/uverbs_std_types.h | 40 +++++------ include/rdma/uverbs_types.h | 38 ++++++---- 4 files changed, 107 insertions(+), 99 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index ef293379f37a..b75c7da0d0a4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -209,67 +209,51 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; -const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel = { - .type = UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file), 0), - .context_closed = uverbs_hot_unplug_completion_event_file, - .fops = &uverbs_event_fops, - .name = "[infinibandevent]", - .flags = O_RDONLY, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_cq = { - .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0), - .destroy_object = uverbs_free_cq, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_qp = { - .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0), - .destroy_object = uverbs_free_qp, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_mw = { - .type = UVERBS_TYPE_ALLOC_IDR(0), - .destroy_object = uverbs_free_mw, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_mr = { - /* 1 is used in order to free the MR after all the MWs */ - .type = UVERBS_TYPE_ALLOC_IDR(1), - .destroy_object = uverbs_free_mr, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_srq = { - .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0), - .destroy_object = uverbs_free_srq, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_ah = { - .type = UVERBS_TYPE_ALLOC_IDR(0), - .destroy_object = uverbs_free_ah, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_flow = { - .type = UVERBS_TYPE_ALLOC_IDR(0), - .destroy_object = uverbs_free_flow, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_wq = { - .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0), - .destroy_object = uverbs_free_wq, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table = { - .type = UVERBS_TYPE_ALLOC_IDR(0), - .destroy_object = uverbs_free_rwq_ind_tbl, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd = { - .type = UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0), - .destroy_object = uverbs_free_xrcd, -}; - -const struct uverbs_obj_idr_type uverbs_type_attrs_pd = { - /* 2 is used in order to free the PD after MRs */ - .type = UVERBS_TYPE_ALLOC_IDR(2), - .destroy_object = uverbs_free_pd, -}; +DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, + UVERBS_OBJECT_COMP_CHANNEL, + &UVERBS_TYPE_ALLOC_FD(0, + sizeof(struct ib_uverbs_completion_event_file), + uverbs_hot_unplug_completion_event_file, + &uverbs_event_fops, + "[infinibandevent]", O_RDONLY)); + +DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, + uverbs_free_cq)); + +DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, + uverbs_free_qp)); + +DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); + +DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR, + /* 1 is used in order to free the MR after all the MWs */ + &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr)); + +DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, + uverbs_free_srq)); + +DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); + +DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow)); + +DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, + uverbs_free_wq)); + +DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table, + UVERBS_OBJECT_RWQ_IND_TBL, + &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); + +DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD, + &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, + uverbs_free_xrcd)); + +DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD, + /* 2 is used in order to free the PD after MRs */ + &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index f83f56329761..99130083615e 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -127,6 +127,22 @@ struct uverbs_root_spec { struct uverbs_object_spec_hash *object_buckets[0]; }; +/* + * ======================================= + * Verbs definitions + * ======================================= + */ + +struct uverbs_object_def { + const struct uverbs_obj_type *type_attrs; +}; + +#define _UVERBS_OBJECT(_id, _type_attrs, ...) \ + ((const struct uverbs_object_def) { \ + .type_attrs = _type_attrs}) +#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ + const struct uverbs_object_def _name = \ + _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) /* ================================================= * Parsing infrastructure * ================================================= diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 7771ce966952..eda271b4aa6c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -35,18 +35,18 @@ #include -extern const struct uverbs_obj_fd_type uverbs_type_attrs_comp_channel; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_cq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_qp; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_rwq_ind_table; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_wq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_srq; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_ah; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_flow; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_mr; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_mw; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_pd; -extern const struct uverbs_obj_idr_type uverbs_type_attrs_xrcd; +extern const struct uverbs_object_def uverbs_object_comp_channel; +extern const struct uverbs_object_def uverbs_object_cq; +extern const struct uverbs_object_def uverbs_object_qp; +extern const struct uverbs_object_def uverbs_object_rwq_ind_table; +extern const struct uverbs_object_def uverbs_object_wq; +extern const struct uverbs_object_def uverbs_object_srq; +extern const struct uverbs_object_def uverbs_object_ah; +extern const struct uverbs_object_def uverbs_object_flow; +extern const struct uverbs_object_def uverbs_object_mr; +extern const struct uverbs_object_def uverbs_object_mw; +extern const struct uverbs_object_def uverbs_object_pd; +extern const struct uverbs_object_def uverbs_object_xrcd; static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, @@ -56,22 +56,22 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, return rdma_lookup_get_uobject(type, ucontext, id, write); } -#define uobj_get_type(_type) uverbs_type_attrs_##_type.type +#define uobj_get_type(_object) uverbs_object_##_object.type_attrs #define uobj_get_read(_type, _id, _ucontext) \ - __uobj_get(&(_type), false, _ucontext, _id) + __uobj_get(_type, false, _ucontext, _id) -#define uobj_get_obj_read(_type, _id, _ucontext) \ +#define uobj_get_obj_read(_object, _id, _ucontext) \ ({ \ - struct ib_uobject *uobj = \ - __uobj_get(&uobj_get_type(_type), \ + struct ib_uobject *__uobj = \ + __uobj_get(uverbs_object_##_object.type_attrs, \ false, _ucontext, _id); \ \ - (struct ib_##_type *)(IS_ERR(uobj) ? NULL : uobj->object); \ + (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\ }) #define uobj_get_write(_type, _id, _ucontext) \ - __uobj_get(&(_type), true, _ucontext, _id) + __uobj_get(_type, true, _ucontext, _id) static inline void uobj_put_read(struct ib_uobject *uobj) { @@ -108,7 +108,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type } #define uobj_alloc(_type, ucontext) \ - __uobj_alloc(&(_type), ucontext) + __uobj_alloc(_type, ucontext) #endif diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 351ea185df44..9760b6d70744 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -151,22 +151,30 @@ extern const struct uverbs_obj_type_class uverbs_fd_class; #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ sizeof(char)) -#define UVERBS_TYPE_ALLOC_FD(_size, _order) \ - { \ - .destroy_order = _order, \ - .type_class = &uverbs_fd_class, \ - .obj_size = (_size) + \ - UVERBS_BUILD_BUG_ON((_size) < \ - sizeof(struct ib_uobject_file)),\ - } -#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order) \ - { \ +#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\ + ((&((const struct uverbs_obj_fd_type) \ + {.type = { \ + .destroy_order = _order, \ + .type_class = &uverbs_fd_class, \ + .obj_size = (_obj_size) + \ + UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ + }, \ + .context_closed = _context_closed, \ + .fops = _fops, \ + .name = _name, \ + .flags = _flags}))->type) +#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \ + ((&((const struct uverbs_obj_idr_type) \ + {.type = { \ .destroy_order = _order, \ .type_class = &uverbs_idr_class, \ .obj_size = (_size) + \ - UVERBS_BUILD_BUG_ON((_size) < \ - sizeof(struct ib_uobject)), \ - } -#define UVERBS_TYPE_ALLOC_IDR(_order) \ - UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order) + UVERBS_BUILD_BUG_ON((_size) < \ + sizeof(struct ib_uobject)) \ + }, \ + .destroy_object = _destroy_object,}))->type) +#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \ + UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \ + _destroy_object) + #endif -- cgit v1.2.3 From 09e3ebf8c193d3f154c4ffb7cb18995df0243bc6 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:06:59 +0300 Subject: IB/core: Add DEVICE object and root tree structure This adds the DEVICE object. This object supports creating the context that all objects are created from. Moreover, it supports executing methods which are related to the device itself, such as QUERY_DEVICE. This is a singleton object (per file instance). All standard objects are put in the root structure. This root will later on be used in drivers as the source for their whole parsing tree. Later on, when new features are added, these drivers could mix this root with other customized objects. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 17 +++++++++++++++ include/rdma/uverbs_ioctl.h | 35 ++++++++++++++++++++++++++++++ include/rdma/uverbs_std_types.h | 18 +++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index b75c7da0d0a4..5f90978bda8d 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -257,3 +257,20 @@ DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD, DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD, /* 2 is used in order to free the PD after MRs */ &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); + +DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL); + +DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, + &uverbs_object_device, + &uverbs_object_pd, + &uverbs_object_mr, + &uverbs_object_comp_channel, + &uverbs_object_cq, + &uverbs_object_qp, + &uverbs_object_ah, + &uverbs_object_mw, + &uverbs_object_srq, + &uverbs_object_flow, + &uverbs_object_wq, + &uverbs_object_rwq_ind_table, + &uverbs_object_xrcd); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 99130083615e..2e8925434d74 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -133,16 +133,51 @@ struct uverbs_root_spec { * ======================================= */ +struct uverbs_attr_def { + u16 id; + struct uverbs_attr_spec attr; +}; + +struct uverbs_method_def { + u16 id; + /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ + u32 flags; + size_t num_attrs; + const struct uverbs_attr_def * const (*attrs)[]; + int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, + struct uverbs_attr_bundle *ctx); +}; + struct uverbs_object_def { + u16 id; const struct uverbs_obj_type *type_attrs; + size_t num_methods; + const struct uverbs_method_def * const (*methods)[]; +}; + +struct uverbs_object_tree_def { + size_t num_objects; + const struct uverbs_object_def * const (*objects)[]; }; #define _UVERBS_OBJECT(_id, _type_attrs, ...) \ ((const struct uverbs_object_def) { \ + .id = _id, \ .type_attrs = _type_attrs}) #define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \ const struct uverbs_object_def _name = \ _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__) +#define _UVERBS_TREE_OBJECTS_SZ(...) \ + (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \ + sizeof(const struct uverbs_object_def *)) +#define _UVERBS_OBJECT_TREE(...) \ + ((const struct uverbs_object_tree_def) { \ + .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \ + .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} }) +#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \ + const struct uverbs_object_tree_def _name = \ + _UVERBS_OBJECT_TREE(__VA_ARGS__) + /* ================================================= * Parsing infrastructure * ================================================= diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index eda271b4aa6c..bef74099b7c5 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -35,6 +35,23 @@ #include +enum uverbs_default_objects { + UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ + UVERBS_OBJECT_PD, + UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_OBJECT_CQ, + UVERBS_OBJECT_QP, + UVERBS_OBJECT_SRQ, + UVERBS_OBJECT_AH, + UVERBS_OBJECT_MR, + UVERBS_OBJECT_MW, + UVERBS_OBJECT_FLOW, + UVERBS_OBJECT_XRCD, + UVERBS_OBJECT_RWQ_IND_TBL, + UVERBS_OBJECT_WQ, + UVERBS_OBJECT_LAST, +}; + extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; extern const struct uverbs_object_def uverbs_object_qp; @@ -47,6 +64,7 @@ extern const struct uverbs_object_def uverbs_object_mr; extern const struct uverbs_object_def uverbs_object_mw; extern const struct uverbs_object_def uverbs_object_pd; extern const struct uverbs_object_def uverbs_object_xrcd; +extern const struct uverbs_object_def uverbs_object_device; static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, -- cgit v1.2.3 From 118620d3686b2d624f9a5019f2f14c64cf50d21a Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:00 +0300 Subject: IB/core: Add uverbs merge trees functionality Different drivers support different features and even subset of the common uverbs implementation. Currently, this is handled as bitmask in every driver that represents which kind of methods it supports, but doesn't go down to attributes granularity. Moreover, drivers might want to add their specific types, methods and attributes to let their user-space counter-parts be exposed to some more efficient abstractions. It means that existence of different features is validated syntactically via the parsing infrastructure rather than using a complex in-handler logic. In order to do that, we allow defining features and abstractions as parsing trees. These per-feature parsing tree could be merged to an efficient (perfect-hash based) parsing tree, which is later used by the parsing infrastructure. To sum it up, this makes a parse tree unique for a device and represents only the features this particular device supports. This is done by having a root specification tree per feature. Before a device registers itself as an IB device, it merges all these trees into one parsing tree. This parsing tree is used to parse all user-space commands. A future user-space application could read this parse tree. This tree represents which objects, methods and attributes are supported by this device. This is based on the idea of Jason Gunthorpe Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/Makefile | 3 +- drivers/infiniband/core/uverbs_ioctl_merge.c | 665 +++++++++++++++++++++++++++ include/rdma/uverbs_ioctl.h | 40 +- 3 files changed, 706 insertions(+), 2 deletions(-) create mode 100644 drivers/infiniband/core/uverbs_ioctl_merge.c (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 746756dc9877..b4df164f71a6 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -32,4 +32,5 @@ ib_umad-y := user_mad.o ib_ucm-y := ucm.o ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ - rdma_core.o uverbs_std_types.o uverbs_ioctl.o + rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ + uverbs_ioctl_merge.o diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c new file mode 100644 index 000000000000..76ddb6564578 --- /dev/null +++ b/drivers/infiniband/core/uverbs_ioctl_merge.c @@ -0,0 +1,665 @@ +/* + * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "uverbs.h" + +#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT) +#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT) +#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK) + +#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \ + buckets_offset) \ + for (tmpj = 0, \ + elem = (*(const void ***)((hashes)[tmpi] + \ + (buckets_offset)))[0]; \ + tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \ + tmpj++) \ + if ((elem = ((*(const void ***)(hashes[tmpi] + \ + (buckets_offset)))[tmpj]))) + +/* + * Iterate all elements of a few @hashes. The number of given hashes is + * indicated by @num_hashes. The offset of the number of buckets in the hash is + * represented by @num_buckets_offset, while the offset of the buckets array in + * the hash structure is represented by @buckets_offset. tmpi and tmpj are two + * short (or int) based indices that are given by the user. tmpi iterates over + * the different hashes. @elem points the current element in the hashes[tmpi] + * bucket we are looping on. To be honest, @hashes representation isn't exactly + * a hash, but more a collection of elements. These elements' ids are treated + * in a hash like manner, where the first upper bits are the bucket number. + * These elements are later mapped into a perfect-hash. + */ +#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \ + num_buckets_offset, buckets_offset) \ + for (tmpi = 0; tmpi < (num_hashes); tmpi++) \ + _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\ + buckets_offset) + +#define get_elements_iterators_entry_above(iters, num_elements, elements, \ + num_objects_fld, objects_fld, bucket,\ + min_id) \ + get_elements_above_id((const void **)iters, num_elements, \ + (const void **)(elements), \ + offsetof(typeof(**elements), \ + num_objects_fld), \ + offsetof(typeof(**elements), objects_fld),\ + offsetof(typeof(***(*elements)->objects_fld), id),\ + bucket, min_id) + +#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \ + get_elements_iterators_entry_above(iters, num_trees, trees, \ + num_objects, objects, bucket, min_id) + +#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\ + get_elements_iterators_entry_above(method_iters, num_iters, iters, \ + num_methods, methods, bucket, min_id) + +#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\ + get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \ + num_attrs, attrs, bucket, min_id) + +/* + * get_elements_above_id get a few hashes represented by @elements and + * @num_elements. The hashes fields are described by @num_offset, @data_offset + * and @id_offset in the same way as required by for_each_element. The function + * returns an array of @iters, represents an array of elements in the hashes + * buckets, which their ids are the smallest ids in all hashes but are all + * larger than the id given by min_id. Elements are only added to the iters + * array if their id belongs to the bucket @bucket. The number of elements in + * the returned array is returned by the function. @min_id is also updated to + * reflect the new min_id of all elements in iters. + */ +static size_t get_elements_above_id(const void **iters, + unsigned int num_elements, + const void **elements, + size_t num_offset, + size_t data_offset, + size_t id_offset, + u16 bucket, + short *min_id) +{ + size_t num_iters = 0; + short min = SHRT_MAX; + const void *elem; + int i, j, last_stored = -1; + + for_each_element(elem, i, j, elements, num_elements, num_offset, + data_offset) { + u16 id = *(u16 *)(elem + id_offset); + + if (GET_NS_ID(id) != bucket) + continue; + + if (GET_ID(id) < *min_id || + (min != SHRT_MAX && GET_ID(id) > min)) + continue; + + /* + * We first iterate all hashes represented by @elements. When + * we do, we try to find an element @elem in the bucket @bucket + * which its id is min. Since we can't ensure the user sorted + * the elements in increasing order, we override this hash's + * minimal id element we found, if a new element with a smaller + * id was just found. + */ + iters[last_stored == i ? num_iters - 1 : num_iters++] = elem; + last_stored = i; + min = GET_ID(id); + } + + /* + * We only insert to our iters array an element, if its id is smaller + * than all previous ids. Therefore, the final iters array is sorted so + * that smaller ids are in the end of the array. + * Therefore, we need to clean the beginning of the array to make sure + * all ids of final elements are equal to min. + */ + for (i = num_iters - 1; i >= 0 && + GET_ID(*(u16 *)(iters[i] + id_offset)) == min; i--) + ; + + num_iters -= i + 1; + memmove(iters, iters + i + 1, sizeof(*iters) * num_iters); + + *min_id = min; + return num_iters; +} + +#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ + objects_fld, bucket) \ + find_max_element_id(num_elements, (const void **)(elements), \ + offsetof(typeof(**elements), num_objects_fld), \ + offsetof(typeof(**elements), objects_fld), \ + offsetof(typeof(***(*elements)->objects_fld), id),\ + bucket) + +static short find_max_element_ns_id(unsigned int num_elements, + const void **elements, + size_t num_offset, + size_t data_offset, + size_t id_offset) +{ + short max_ns = SHRT_MIN; + const void *elem; + int i, j; + + for_each_element(elem, i, j, elements, num_elements, num_offset, + data_offset) { + u16 id = *(u16 *)(elem + id_offset); + + if (GET_NS_ID(id) > max_ns) + max_ns = GET_NS_ID(id); + } + + return max_ns; +} + +static short find_max_element_id(unsigned int num_elements, + const void **elements, + size_t num_offset, + size_t data_offset, + size_t id_offset, + u16 bucket) +{ + short max_id = SHRT_MIN; + const void *elem; + int i, j; + + for_each_element(elem, i, j, elements, num_elements, num_offset, + data_offset) { + u16 id = *(u16 *)(elem + id_offset); + + if (GET_NS_ID(id) == bucket && + GET_ID(id) > max_id) + max_id = GET_ID(id); + } + return max_id; +} + +#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \ + objects_fld, bucket) \ + find_max_element_id(num_elements, (const void **)(elements), \ + offsetof(typeof(**elements), num_objects_fld), \ + offsetof(typeof(**elements), objects_fld), \ + offsetof(typeof(***(*elements)->objects_fld), id),\ + bucket) + +#define find_max_element_ns_entry_id(num_elements, elements, \ + num_objects_fld, objects_fld) \ + find_max_element_ns_id(num_elements, (const void **)(elements), \ + offsetof(typeof(**elements), num_objects_fld),\ + offsetof(typeof(**elements), objects_fld), \ + offsetof(typeof(***(*elements)->objects_fld), id)) + +/* + * find_max_xxxx_ns_id gets a few elements. Each element is described by an id + * which its upper bits represents a namespace. It finds the max namespace. This + * could be used in order to know how many buckets do we need to allocate. If no + * elements exist, SHRT_MIN is returned. Namespace represents here different + * buckets. The common example is "common bucket" and "driver bucket". + * + * find_max_xxxx_id gets a few elements and a bucket. Each element is described + * by an id which its upper bits represent a namespace. It returns the max id + * which is contained in the same namespace defined in @bucket. This could be + * used in order to know how many elements do we need to allocate in the bucket. + * If no elements exist, SHRT_MIN is returned. + */ + +#define find_max_object_id(num_trees, trees, bucket) \ + find_max_element_entry_id(num_trees, trees, num_objects,\ + objects, bucket) +#define find_max_object_ns_id(num_trees, trees) \ + find_max_element_ns_entry_id(num_trees, trees, \ + num_objects, objects) + +#define find_max_method_id(num_iters, iters, bucket) \ + find_max_element_entry_id(num_iters, iters, num_methods,\ + methods, bucket) +#define find_max_method_ns_id(num_iters, iters) \ + find_max_element_ns_entry_id(num_iters, iters, \ + num_methods, methods) + +#define find_max_attr_id(num_iters, iters, bucket) \ + find_max_element_entry_id(num_iters, iters, num_attrs, \ + attrs, bucket) +#define find_max_attr_ns_id(num_iters, iters) \ + find_max_element_ns_entry_id(num_iters, iters, \ + num_attrs, attrs) + +static void free_method(struct uverbs_method_spec *method) +{ + unsigned int i; + + if (!method) + return; + + for (i = 0; i < method->num_buckets; i++) + kfree(method->attr_buckets[i]); + + kfree(method); +} + +#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \ + (attr)->type == UVERBS_ATTR_TYPE_FD) + +/* + * This function gets array of size @num_method_defs which contains pointers to + * method definitions @method_defs. The function allocates an + * uverbs_method_spec structure and initializes its number of buckets and the + * elements in buckets to the correct attributes. While doing that, it + * validates that there aren't conflicts between attributes of different + * method_defs. + */ +static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs, + size_t num_method_defs) +{ + int bucket_idx; + int max_attr_buckets = 0; + size_t num_attr_buckets = 0; + int res = 0; + struct uverbs_method_spec *method = NULL; + const struct uverbs_attr_def **attr_defs; + unsigned int num_of_singularities = 0; + + max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs); + if (max_attr_buckets >= 0) + num_attr_buckets = max_attr_buckets + 1; + + method = kzalloc(sizeof(*method) + + num_attr_buckets * sizeof(*method->attr_buckets), + GFP_KERNEL); + if (!method) + return ERR_PTR(-ENOMEM); + + method->num_buckets = num_attr_buckets; + attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL); + if (!attr_defs) { + res = -ENOMEM; + goto free_method; + } + for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) { + short min_id = SHRT_MIN; + int attr_max_bucket = 0; + struct uverbs_attr_spec_hash *hash = NULL; + + attr_max_bucket = find_max_attr_id(num_method_defs, method_defs, + bucket_idx); + if (attr_max_bucket < 0) + continue; + + hash = kzalloc(sizeof(*hash) + + ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1), + sizeof(long)) + + BITS_TO_LONGS(attr_max_bucket) * sizeof(long), + GFP_KERNEL); + if (!hash) { + res = -ENOMEM; + goto free; + } + hash->num_attrs = attr_max_bucket + 1; + method->num_child_attrs += hash->num_attrs; + hash->mandatory_attrs_bitmask = (void *)(hash + 1) + + ALIGN(sizeof(*hash->attrs) * + (attr_max_bucket + 1), + sizeof(long)); + + method->attr_buckets[bucket_idx] = hash; + + do { + size_t num_attr_defs; + struct uverbs_attr_spec *attr; + bool attr_obj_with_special_access; + + num_attr_defs = + get_attrs_above_id(attr_defs, + num_method_defs, + method_defs, + bucket_idx, + &min_id); + /* Last attr in bucket */ + if (!num_attr_defs) + break; + + if (num_attr_defs > 1) { + /* + * We don't allow two attribute definitions for + * the same attribute. This is usually a + * programmer error. If required, it's better to + * just add a new attribute to capture the new + * semantics. + */ + res = -EEXIST; + goto free; + } + + attr = &hash->attrs[min_id]; + memcpy(attr, &attr_defs[0]->attr, sizeof(*attr)); + + attr_obj_with_special_access = IS_ATTR_OBJECT(attr) && + (attr->obj.access == UVERBS_ACCESS_NEW || + attr->obj.access == UVERBS_ACCESS_DESTROY); + num_of_singularities += !!attr_obj_with_special_access; + if (WARN(num_of_singularities > 1, + "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n", + min_id) || + WARN(attr_obj_with_special_access && + !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY), + "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy aceess but isn't mandatory\n", + min_id) || + WARN(IS_ATTR_OBJECT(attr) && + attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ, + "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n", + min_id)) { + res = -EINVAL; + goto free; + } + + if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY) + set_bit(min_id, hash->mandatory_attrs_bitmask); + min_id++; + + } while (1); + } + kfree(attr_defs); + return method; + +free: + kfree(attr_defs); +free_method: + free_method(method); + return ERR_PTR(res); +} + +static void free_object(struct uverbs_object_spec *object) +{ + unsigned int i, j; + + if (!object) + return; + + for (i = 0; i < object->num_buckets; i++) { + struct uverbs_method_spec_hash *method_buckets = + object->method_buckets[i]; + + if (!method_buckets) + continue; + + for (j = 0; j < method_buckets->num_methods; j++) + free_method(method_buckets->methods[j]); + + kfree(method_buckets); + } + + kfree(object); +} + +/* + * This function gets array of size @num_object_defs which contains pointers to + * object definitions @object_defs. The function allocated an + * uverbs_object_spec structure and initialize its number of buckets and the + * elements in buckets to the correct methods. While doing that, it + * sorts out the correct relationship between conflicts in the same method. + */ +static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs, + size_t num_object_defs) +{ + u16 bucket_idx; + int max_method_buckets = 0; + u16 num_method_buckets = 0; + int res = 0; + struct uverbs_object_spec *object = NULL; + const struct uverbs_method_def **method_defs; + + max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs); + if (max_method_buckets >= 0) + num_method_buckets = max_method_buckets + 1; + + object = kzalloc(sizeof(*object) + + num_method_buckets * + sizeof(*object->method_buckets), GFP_KERNEL); + if (!object) + return ERR_PTR(-ENOMEM); + + object->num_buckets = num_method_buckets; + method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL); + if (!method_defs) { + res = -ENOMEM; + goto free_object; + } + + for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) { + short min_id = SHRT_MIN; + int methods_max_bucket = 0; + struct uverbs_method_spec_hash *hash = NULL; + + methods_max_bucket = find_max_method_id(num_object_defs, object_defs, + bucket_idx); + if (methods_max_bucket < 0) + continue; + + hash = kzalloc(sizeof(*hash) + + sizeof(*hash->methods) * (methods_max_bucket + 1), + GFP_KERNEL); + if (!hash) { + res = -ENOMEM; + goto free; + } + + hash->num_methods = methods_max_bucket + 1; + object->method_buckets[bucket_idx] = hash; + + do { + size_t num_method_defs; + struct uverbs_method_spec *method; + int i; + + num_method_defs = + get_methods_above_id(method_defs, + num_object_defs, + object_defs, + bucket_idx, + &min_id); + /* Last method in bucket */ + if (!num_method_defs) + break; + + method = build_method_with_attrs(method_defs, + num_method_defs); + if (IS_ERR(method)) { + res = PTR_ERR(method); + goto free; + } + + /* + * The last tree which is given as an argument to the + * merge overrides previous method handler. + * Therefore, we iterate backwards and search for the + * first handler which != NULL. This also defines the + * set of flags used for this handler. + */ + for (i = num_object_defs - 1; + i >= 0 && !method_defs[i]->handler; i--) + ; + hash->methods[min_id++] = method; + /* NULL handler isn't allowed */ + if (WARN(i < 0, + "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n", + min_id)) { + res = -EINVAL; + goto free; + } + method->handler = method_defs[i]->handler; + method->flags = method_defs[i]->flags; + + } while (1); + } + kfree(method_defs); + return object; + +free: + kfree(method_defs); +free_object: + free_object(object); + return ERR_PTR(res); +} + +void uverbs_free_spec_tree(struct uverbs_root_spec *root) +{ + unsigned int i, j; + + if (!root) + return; + + for (i = 0; i < root->num_buckets; i++) { + struct uverbs_object_spec_hash *object_hash = + root->object_buckets[i]; + + if (!object_hash) + continue; + + for (j = 0; j < object_hash->num_objects; j++) + free_object(object_hash->objects[j]); + + kfree(object_hash); + } + + kfree(root); +} +EXPORT_SYMBOL(uverbs_free_spec_tree); + +struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_object_tree_def **trees) +{ + u16 bucket_idx; + short max_object_buckets = 0; + size_t num_objects_buckets = 0; + struct uverbs_root_spec *root_spec = NULL; + const struct uverbs_object_def **object_defs; + int i; + int res = 0; + + max_object_buckets = find_max_object_ns_id(num_trees, trees); + /* + * Devices which don't want to support ib_uverbs, should just allocate + * an empty parsing tree. Every user-space command won't hit any valid + * entry in the parsing tree and thus will fail. + */ + if (max_object_buckets >= 0) + num_objects_buckets = max_object_buckets + 1; + + root_spec = kzalloc(sizeof(*root_spec) + + num_objects_buckets * sizeof(*root_spec->object_buckets), + GFP_KERNEL); + if (!root_spec) + return ERR_PTR(-ENOMEM); + root_spec->num_buckets = num_objects_buckets; + + object_defs = kcalloc(num_trees, sizeof(*object_defs), + GFP_KERNEL); + if (!object_defs) { + res = -ENOMEM; + goto free_root; + } + + for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) { + short min_id = SHRT_MIN; + short objects_max_bucket; + struct uverbs_object_spec_hash *hash = NULL; + + objects_max_bucket = find_max_object_id(num_trees, trees, + bucket_idx); + if (objects_max_bucket < 0) + continue; + + hash = kzalloc(sizeof(*hash) + + sizeof(*hash->objects) * (objects_max_bucket + 1), + GFP_KERNEL); + if (!hash) { + res = -ENOMEM; + goto free; + } + hash->num_objects = objects_max_bucket + 1; + root_spec->object_buckets[bucket_idx] = hash; + + do { + size_t num_object_defs; + struct uverbs_object_spec *object; + + num_object_defs = get_objects_above_id(object_defs, + num_trees, + trees, + bucket_idx, + &min_id); + /* Last object in bucket */ + if (!num_object_defs) + break; + + object = build_object_with_methods(object_defs, + num_object_defs); + if (IS_ERR(object)) { + res = PTR_ERR(object); + goto free; + } + + /* + * The last tree which is given as an argument to the + * merge overrides previous object's type_attrs. + * Therefore, we iterate backwards and search for the + * first type_attrs which != NULL. + */ + for (i = num_object_defs - 1; + i >= 0 && !object_defs[i]->type_attrs; i--) + ; + /* + * NULL is a valid type_attrs. It means an object we + * can't instantiate (like DEVICE). + */ + object->type_attrs = i < 0 ? NULL : + object_defs[i]->type_attrs; + + hash->objects[min_id++] = object; + } while (1); + } + + kfree(object_defs); + return root_spec; + +free: + kfree(object_defs); +free_root: + uverbs_free_spec_tree(root_spec); + return ERR_PTR(res); +} +EXPORT_SYMBOL(uverbs_alloc_spec_tree); diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 2e8925434d74..cf5b238d2d81 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -235,5 +235,43 @@ static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_ return test_bit(idx, attrs_hash->valid_bitmap); } -#endif +/* ================================================= + * Definitions -> Specs infrastructure + * ================================================= + */ + +/* + * uverbs_alloc_spec_tree - Merges different common and driver specific feature + * into one parsing tree that every uverbs command will be parsed upon. + * + * @num_trees: Number of trees in the array @trees. + * @trees: Array of pointers to tree root definitions to merge. Each such tree + * possibly contains objects, methods and attributes definitions. + * + * Returns: + * uverbs_root_spec *: The root of the merged parsing tree. + * On error, we return an error code. Error is checked via IS_ERR. + * + * The following merges could take place: + * a. Two trees representing the same method with different handler + * -> We take the handler of the tree that its handler != NULL + * and its index in the trees array is greater. The incentive for that + * is that developers are expected to first merge common trees and then + * merge trees that gives specialized the behaviour. + * b. Two trees representing the same object with different + * type_attrs (struct uverbs_obj_type): + * -> We take the type_attrs of the tree that its type_attr != NULL + * and its index in the trees array is greater. This could be used + * in order to override the free function, allocation size, etc. + * c. Two trees representing the same method attribute (same id but possibly + * different attributes): + * -> ERROR (-ENOENT), we believe that's not the programmer's intent. + * + * An object without any methods is considered invalid and will abort the + * function with -ENOENT error. + */ +struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_object_tree_def **trees); +void uverbs_free_spec_tree(struct uverbs_root_spec *root); +#endif -- cgit v1.2.3 From 4da70da23e9ba03f7f9e067fbe0eec6ebbfee401 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:02 +0300 Subject: IB/core: Explicitly destroy an object while keeping uobject When some objects are destroyed, we need to extract their status at destruction. After object's destruction, this status (e.g. events_reported) relies in the uobject. In order to have the latest and correct status, the underlying object should be destroyed, but we should keep the uobject alive and read this information off the uobject. We introduce a rdma_explicit_destroy function. This function destroys the class type object (for example, the IDR class type which destroys the underlying object as well) and then convert the uobject to be of a null class type. This uobject will then be destroyed as any other uobject once uverbs_finalize_object[s] is called. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/rdma_core.c | 35 +++++++++++++++++++++++++++++++++++ include/rdma/uverbs_types.h | 1 + 2 files changed, 36 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 2a2f002ac7cb..85b5ee4defa4 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -451,6 +451,41 @@ int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) return ret; } +static int null_obj_type_class_remove_commit(struct ib_uobject *uobj, + enum rdma_remove_reason why) +{ + return 0; +} + +static const struct uverbs_obj_type null_obj_type = { + .type_class = &((const struct uverbs_obj_type_class){ + .remove_commit = null_obj_type_class_remove_commit, + /* be cautious */ + .needs_kfree_rcu = true}), +}; + +int rdma_explicit_destroy(struct ib_uobject *uobject) +{ + int ret; + struct ib_ucontext *ucontext = uobject->context; + + /* Cleanup is running. Calling this should have been impossible */ + if (!down_read_trylock(&ucontext->cleanup_rwsem)) { + WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n"); + return 0; + } + lockdep_check(uobject, true); + ret = uobject->type->type_class->remove_commit(uobject, + RDMA_REMOVE_DESTROY); + if (ret) + return ret; + + uobject->type = &null_obj_type; + + up_read(&ucontext->cleanup_rwsem); + return 0; +} + static void alloc_commit_idr_uobject(struct ib_uobject *uobj) { uverbs_uobject_add(uobj); diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h index 9760b6d70744..cc04ec65588d 100644 --- a/include/rdma/uverbs_types.h +++ b/include/rdma/uverbs_types.h @@ -129,6 +129,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, void rdma_alloc_abort_uobject(struct ib_uobject *uobj); int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); int rdma_alloc_commit_uobject(struct ib_uobject *uobj); +int rdma_explicit_destroy(struct ib_uobject *uobject); struct uverbs_obj_fd_type { /* -- cgit v1.2.3 From d70724f149b107f8e4062320270d3d8b6713a1bb Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:04 +0300 Subject: IB/core: Add legacy driver's user-data In this phase, we don't want to change all the drivers to use flexible driver's specific attributes. Therefore, we add two default attributes: UHW_IN and UHW_OUT. These attributes are optional in some methods and they encode the driver specific command data. We add a function that extract this data and creates the legacy udata over it. Driver's data should start from UVERBS_UDATA_DRIVER_DATA_FLAG. This turns on the first bit of the namespace, indicating this attribute belongs to the driver's namespace. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 40 ++++++++++++++++++++++++++ include/rdma/uverbs_ioctl.h | 46 ++++++++++++++++++++++++++++++ include/uapi/rdma/ib_user_ioctl_verbs.h | 10 +++++++ 3 files changed, 96 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 5f90978bda8d..db66c18857e4 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -209,6 +209,46 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_ return 0; }; +/* + * This spec is used in order to pass information to the hardware driver in a + * legacy way. Every verb that could get driver specific data should get this + * spec. + */ +static const struct uverbs_attr_def uverbs_uhw_compat_in = + UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); +static const struct uverbs_attr_def uverbs_uhw_compat_out = + UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ)); + +static void create_udata(struct uverbs_attr_bundle *ctx, + struct ib_udata *udata) +{ + /* + * This is for ease of conversion. The purpose is to convert all drivers + * to use uverbs_attr_bundle instead of ib_udata. + * Assume attr == 0 is input and attr == 1 is output. + */ + void __user *inbuf; + size_t inbuf_len = 0; + void __user *outbuf; + size_t outbuf_len = 0; + const struct uverbs_attr *uhw_in = + uverbs_attr_get(ctx, UVERBS_UHW_IN); + const struct uverbs_attr *uhw_out = + uverbs_attr_get(ctx, UVERBS_UHW_OUT); + + if (!IS_ERR(uhw_in)) { + inbuf = uhw_in->ptr_attr.ptr; + inbuf_len = uhw_in->ptr_attr.len; + } + + if (!IS_ERR(uhw_out)) { + outbuf = uhw_out->ptr_attr.ptr; + outbuf_len = uhw_out->ptr_attr.len; + } + + INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); +} + DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, UVERBS_OBJECT_COMP_CHANNEL, &UVERBS_TYPE_ALLOC_FD(0, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9a8d217cdc1d..759afa0621ea 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -36,6 +36,7 @@ #include #include #include +#include /* * ======================================= @@ -338,6 +339,51 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b idx & ~UVERBS_ID_NS_MASK); } +static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, + u16 idx) +{ + u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; + + if (!uverbs_attr_is_valid(attrs_bundle, idx)) + return ERR_PTR(-ENOENT); + + return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; +} + +static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, + size_t idx, const void *from) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + u16 flags; + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT; + return (!copy_to_user(attr->ptr_attr.ptr, from, attr->ptr_attr.len) && + !put_user(flags, &attr->uattr->flags)) ? 0 : -EFAULT; +} + +static inline int _uverbs_copy_from(void *to, size_t to_size, + const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); + + if (IS_ERR(attr)) + return PTR_ERR(attr); + + if (to_size <= sizeof(((struct ib_uverbs_attr *)0)->data)) + memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); + else if (copy_from_user(to, attr->ptr_attr.ptr, attr->ptr_attr.len)) + return -EFAULT; + + return 0; +} + +#define uverbs_copy_from(to, attrs_bundle, idx) \ + _uverbs_copy_from(to, sizeof(*(to)), attrs_bundle, idx) + /* ================================================= * Definitions -> Specs infrastructure * ================================================= diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 78a2e5be4d6e..90f81eeca35b 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -33,6 +33,11 @@ #ifndef IB_USER_IOCTL_VERBS_H #define IB_USER_IOCTL_VERBS_H +#include + +#define UVERBS_UDATA_DRIVER_DATA_NS 1 +#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) + enum uverbs_default_objects { UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ UVERBS_OBJECT_PD, @@ -50,5 +55,10 @@ enum uverbs_default_objects { UVERBS_OBJECT_LAST, }; +enum { + UVERBS_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_UHW_OUT, +}; + #endif -- cgit v1.2.3 From 9ee79fce364216df35ec46e26d20780c3c1644cc Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:05 +0300 Subject: IB/core: Add completion queue (cq) object actions Adding CQ ioctl actions: 1. create_cq 2. destroy_cq This requires adding the following: 1. A specification describing the method a. Handler b. Attributes specification Each attribute is one of the following: a. PTR_IN - input data Note: This could be encoded inlined for data < 64bit b. PTR_OUT - response data c. IDR - idr based object d. FD - fd based object Blobs attributes (clauses a and b) contain their type, while objects specifications (clauses c and d) contains the expected object type (for example, the given id should be UVERBS_TYPE_PD) and the required access (READ, WRITE, NEW or DESTROY). If a NEW is required, the new object's id will be assigned to this attribute. All attributes could get UA_FLAGS attribute. Currently we support stating that an attribute is mandatory or that the specification size corresponds to a lower bound (and that this attribute could be extended). We currently add both default attributes and the two generic UHW_IN and UHW_OUT driver specific attributes. 2. Handler A handler gets a uverbs_attr_bundle. The handler developer uses uverbs_attr_get to fetch an attribute of a given id. Each of these attribute groups correspond to the specification group defined in the action (clauses 1.b and 1.c respectively). The indices of these arrays corresponds to the attribute ids declared in the specifications (clause 2). The handler is quite simple. It assumes the infrastructure fetched all objects and locked, created or destroyed them as required by the specification. Pointer (or blob) attributes were validated to match their required sizes. After the handler finished, the infrastructure commits or rollbacks the objects. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_std_types.c | 138 ++++++++++++++++++++++++++++- include/uapi/rdma/ib_user_ioctl_verbs.h | 20 +++++ 2 files changed, 157 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index db66c18857e4..0a98579700ec 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -249,6 +249,140 @@ static void create_udata(struct uverbs_attr_bundle *ctx, INIT_UDATA_BUF_OR_NULL(udata, inbuf, outbuf, inbuf_len, outbuf_len); } +static int uverbs_create_cq_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_ucontext *ucontext = file->ucontext; + struct ib_ucq_object *obj; + struct ib_udata uhw; + int ret; + u64 user_handle; + struct ib_cq_init_attr attr = {}; + struct ib_cq *cq; + struct ib_uverbs_completion_event_file *ev_file = NULL; + const struct uverbs_attr *ev_file_attr; + struct ib_uobject *ev_file_uobj; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) + return -EOPNOTSUPP; + + ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR); + if (!ret) + ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE); + if (!ret) + ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE); + if (ret) + return ret; + + /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ + if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT) + return -EFAULT; + + ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL); + if (!IS_ERR(ev_file_attr)) { + ev_file_uobj = ev_file_attr->obj_attr.uobject; + + ev_file = container_of(ev_file_uobj, + struct ib_uverbs_completion_event_file, + uobj_file.uobj); + uverbs_uobject_get(ev_file_uobj); + } + + if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { + ret = -EINVAL; + goto err_event_file; + } + + obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject, + typeof(*obj), uobject); + obj->uverbs_file = ucontext->ufile; + obj->comp_events_reported = 0; + obj->async_events_reported = 0; + INIT_LIST_HEAD(&obj->comp_list); + INIT_LIST_HEAD(&obj->async_list); + + /* Temporary, only until drivers get the new uverbs_attr_bundle */ + create_udata(attrs, &uhw); + + cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err_event_file; + } + + cq->device = ib_dev; + cq->uobject = &obj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = &ev_file->ev_queue; + obj->uobject.object = cq; + obj->uobject.user_handle = user_handle; + atomic_set(&cq->usecnt, 0); + + ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe); + if (ret) + goto err_cq; + + return 0; +err_cq: + ib_destroy_cq(cq); + +err_event_file: + if (ev_file) + uverbs_uobject_put(ev_file_uobj); + return ret; +}; + +static DECLARE_UVERBS_METHOD( + uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler, + &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL, + UVERBS_ACCESS_READ), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32), + &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &uverbs_uhw_compat_in, &uverbs_uhw_compat_out); + +static int uverbs_destroy_cq_handler(struct ib_device *ib_dev, + struct ib_uverbs_file *file, + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_destroy_cq_resp resp; + struct ib_uobject *uobj = + uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject; + struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object, + uobject); + int ret; + + if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ)) + return -EOPNOTSUPP; + + ret = rdma_explicit_destroy(uobj); + if (ret) + return ret; + + resp.comp_events_reported = obj->comp_events_reported; + resp.async_events_reported = obj->async_events_reported; + + return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp); +} + +static DECLARE_UVERBS_METHOD( + uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler, + &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, + UVERBS_ACCESS_DESTROY, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), + &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp, + UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); + DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, UVERBS_OBJECT_COMP_CHANNEL, &UVERBS_TYPE_ALLOC_FD(0, @@ -259,7 +393,9 @@ DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel, DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ, &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, - uverbs_free_cq)); + uverbs_free_cq), + &uverbs_method_cq_create, + &uverbs_method_cq_destroy); DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP, &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 90f81eeca35b..842792eae383 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -60,5 +60,25 @@ enum { UVERBS_UHW_OUT, }; +enum uverbs_create_cq_cmd_attr_ids { + CREATE_CQ_HANDLE, + CREATE_CQ_CQE, + CREATE_CQ_USER_HANDLE, + CREATE_CQ_COMP_CHANNEL, + CREATE_CQ_COMP_VECTOR, + CREATE_CQ_FLAGS, + CREATE_CQ_RESP_CQE, +}; + +enum uverbs_destroy_cq_cmd_attr_ids { + DESTROY_CQ_HANDLE, + DESTROY_CQ_RESP, +}; + +enum uverbs_actions_cq_ops { + UVERBS_CQ_CREATE, + UVERBS_CQ_DESTROY, +}; + #endif -- cgit v1.2.3 From 524271129401ed896dc76e49acdbafc506cb41ac Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:06 +0300 Subject: IB/core: Assign root to all drivers In order to use the parsing tree, we need to assign the root to all drivers. Currently, we just assign the default parsing tree via ib_uverbs_add_one. The driver could override this by assigning a parsing tree prior to registering the device. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_main.c | 18 ++++++++++++++++++ include/rdma/uverbs_ioctl.h | 12 ++++++++++++ include/rdma/uverbs_std_types.h | 14 ++++++++++++++ 4 files changed, 45 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 64d494a64daf..0f6f768f687e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -100,6 +100,7 @@ struct ib_uverbs_device { struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; + struct uverbs_root_spec *specs_root; }; struct ib_uverbs_event_queue { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index defeda33e27f..872fec910c16 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -49,6 +49,7 @@ #include #include +#include #include "uverbs.h" #include "core_priv.h" @@ -1097,6 +1098,18 @@ static void ib_uverbs_add_one(struct ib_device *device) if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; + if (!device->specs_root) { + const struct uverbs_object_tree_def *default_root[] = { + uverbs_default_get_objects()}; + + uverbs_dev->specs_root = uverbs_alloc_spec_tree(1, + default_root); + if (IS_ERR(uverbs_dev->specs_root)) + goto err_class; + + device->specs_root = uverbs_dev->specs_root; + } + ib_set_client_data(device, &uverbs_client, uverbs_dev); return; @@ -1228,6 +1241,11 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); + if (uverbs_dev->specs_root) { + uverbs_free_spec_tree(uverbs_dev->specs_root); + device->specs_root = NULL; + } + kobject_put(&uverbs_dev->kobj); } diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 759afa0621ea..6da44079aa58 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -419,8 +419,20 @@ static inline int _uverbs_copy_from(void *to, size_t to_size, * An object without any methods is considered invalid and will abort the * function with -ENOENT error. */ +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, const struct uverbs_object_tree_def **trees); void uverbs_free_spec_tree(struct uverbs_root_spec *root); +#else +static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, + const struct uverbs_object_tree_def **trees) +{ + return NULL; +} + +static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root) +{ +} +#endif #endif diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h index 400efe2a4d3c..5f8e20bbd67c 100644 --- a/include/rdma/uverbs_std_types.h +++ b/include/rdma/uverbs_std_types.h @@ -34,8 +34,10 @@ #define _UVERBS_STD_TYPES__ #include +#include #include +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) extern const struct uverbs_object_def uverbs_object_comp_channel; extern const struct uverbs_object_def uverbs_object_cq; extern const struct uverbs_object_def uverbs_object_qp; @@ -50,6 +52,18 @@ extern const struct uverbs_object_def uverbs_object_pd; extern const struct uverbs_object_def uverbs_object_xrcd; extern const struct uverbs_object_def uverbs_object_device; +extern const struct uverbs_object_tree_def uverbs_default_objects; +static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) +{ + return &uverbs_default_objects; +} +#else +static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(void) +{ + return NULL; +} +#endif + static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, bool write, struct ib_ucontext *ucontext, -- cgit v1.2.3 From 8eb19e8e7c8658226d8b7e75728e6dfa2ef32717 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 3 Aug 2017 16:07:07 +0300 Subject: IB/core: Expose ioctl interface through experimental Kconfig Add CONFIG_INFINIBAND_EXP_USER_ACCESS that enables the ioctl interface. This interface is experimental and is subject to change. Signed-off-by: Matan Barak Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/Kconfig | 9 +++++++++ drivers/infiniband/core/uverbs.h | 2 ++ drivers/infiniband/core/uverbs_main.c | 6 ++++++ 3 files changed, 17 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 234fe01904e7..3726205c8704 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -34,6 +34,15 @@ config INFINIBAND_USER_ACCESS libibverbs, libibcm and a hardware driver library from . +config INFINIBAND_EXP_USER_ACCESS + bool "Allow experimental support for Infiniband ABI" + depends on INFINIBAND_USER_ACCESS + ---help--- + IOCTL based ABI support for Infiniband. This allows userspace + to invoke the experimental IOCTL based ABI. + These commands are parsed via per-device parsing tree and + enables per-device features. + config INFINIBAND_USER_MEM bool depends on INFINIBAND_USER_ACCESS != n diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 0f6f768f687e..37c8903e7fd0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -219,6 +219,8 @@ int uverbs_dealloc_mw(struct ib_mw *mw); void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj); +long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); + struct ib_uverbs_flow_spec { union { union { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 872fec910c16..dc2aed6fb21b 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -939,6 +939,9 @@ static const struct file_operations uverbs_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, +#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) + .unlocked_ioctl = ib_uverbs_ioctl, +#endif }; static const struct file_operations uverbs_mmap_fops = { @@ -948,6 +951,9 @@ static const struct file_operations uverbs_mmap_fops = { .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, +#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS) + .unlocked_ioctl = ib_uverbs_ioctl, +#endif }; static struct ib_client uverbs_client = { -- cgit v1.2.3