diff options
Diffstat (limited to 'drivers/infiniband')
104 files changed, 2274 insertions, 1591 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 8954792f1acc..36bf50ebb187 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 49ecde98a3d9..e51b739f6ea3 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -106,7 +106,6 @@ struct mcast_group { atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; - int query_id; u16 pkey_index; u8 leave_state; int retries; @@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member) member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) @@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state) IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 23f38cf2c5cd..afe8b28e0878 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T4 and T5 diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e11cf7299945..fa40b685831b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,5 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c9661d8f11dc..f1510cc76d2d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -49,6 +49,7 @@ #include <rdma/ib_addr.h> +#include <libcxgb_cm.h> #include "iw_cxgb4.h" #include "clip_tbl.h" @@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } @@ -333,6 +332,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } @@ -464,72 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev) return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -704,56 +639,32 @@ static int send_flowc(struct c4iw_ep *ep) static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); -} + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static int send_connect(struct c4iw_ep *ep) @@ -768,7 +679,7 @@ static int send_connect(struct c4iw_ep *ep) u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@ -815,10 +726,10 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -1445,9 +1356,9 @@ static void established_upcall(struct c4iw_ep *ep) static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -1464,15 +1375,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } @@ -1827,8 +1735,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) @@ -1966,7 +1878,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@ -1991,10 +1903,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2048,15 +1960,6 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@ -2113,8 +2016,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, @@ -2210,16 +2115,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@ -2291,7 +2201,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@ -2418,7 +2328,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@ -2439,10 +2349,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2514,42 +2424,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -2578,8 +2452,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2587,18 +2461,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@ -2831,18 +2706,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -2935,11 +2810,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) @@ -3136,7 +3009,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; @@ -3371,9 +3244,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3392,10 +3267,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); @@ -4037,8 +3914,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); @@ -4313,7 +4191,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index bc522a9b2bfa..867b8cf82be8 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1033,15 +1033,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d7332ec06..93e3d270a98a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { @@ -1475,6 +1480,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f83604b2f82d..7e7f79e55006 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -881,15 +882,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; -} - static inline int ocqp_supported(const struct cxgb4_lld_info *infop) { #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 3467b906cff8..f57deba6717c 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -718,7 +718,7 @@ static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, return 0; } -void _free_qp(struct kref *kref) +static void _free_qp(struct kref *kref) { struct c4iw_qp *qhp; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index b2bfbb1eef1a..862381aa83c8 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -636,6 +636,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 79575ee873f2..a26a9a0bfc41 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,7 +47,7 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> -#include <linux/cpumask.h> +#include <linux/interrupt.h> #include "hfi.h" #include "affinity.h" @@ -56,7 +56,7 @@ struct hfi1_affinity_node_list node_affinity = { .list = LIST_HEAD_INIT(node_affinity.list), - .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), + .lock = __MUTEX_INITIALIZER(node_affinity.lock) }; /* Name of IRQ types, indexed by enum irq_type */ @@ -160,14 +160,14 @@ void node_affinity_destroy(void) struct list_head *pos, *q; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); list_for_each_safe(pos, q, &node_affinity.list) { entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); kfree(entry); } - spin_unlock(&node_affinity.lock); + mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); } @@ -234,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); /* * If this is the first time this NUMA node's affinity is used, @@ -247,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); + mutex_unlock(&node_affinity.lock); return -ENOMEM; } init_cpu_mask_set(&entry->def_intr); @@ -303,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - spin_lock(&node_affinity.lock); node_affinity_add_tail(entry); - spin_unlock(&node_affinity.lock); } - + mutex_unlock(&node_affinity.lock); return 0; } -int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +/* + * Function updates the irq affinity hint for msix after it has been changed + * by the user using the /proc/irq interface. This function only accepts + * one cpu in the mask. + */ +static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) +{ + struct sdma_engine *sde = msix->arg; + struct hfi1_devdata *dd = sde->dd; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set; + int i, old_cpu; + + if (cpu > num_online_cpus() || cpu == sde->cpu) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + old_cpu = sde->cpu; + sde->cpu = cpu; + cpumask_clear(&msix->mask); + cpumask_set_cpu(cpu, &msix->mask); + dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", + msix->msix.vector, irq_type_names[msix->type], + sde->this_idx, cpu); + irq_set_affinity_hint(msix->msix.vector, &msix->mask); + + /* + * Set the new cpu in the hfi1_affinity_node and clean + * the old cpu if it is not used by any other IRQ + */ + set = &entry->def_intr; + cpumask_set_cpu(cpu, &set->mask); + cpumask_set_cpu(cpu, &set->used); + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *other_msix; + + other_msix = &dd->msix_entries[i]; + if (other_msix->type != IRQ_SDMA || other_msix == msix) + continue; + + if (cpumask_test_cpu(old_cpu, &other_msix->mask)) + goto unlock; + } + cpumask_clear_cpu(old_cpu, &set->mask); + cpumask_clear_cpu(old_cpu, &set->used); +unlock: + mutex_unlock(&node_affinity.lock); +} + +static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + int cpu = cpumask_first(mask); + struct hfi1_msix_entry *msix = container_of(notify, + struct hfi1_msix_entry, + notify); + + /* Only one CPU configuration supported currently */ + hfi1_update_sdma_affinity(msix, cpu); +} + +static void hfi1_irq_notifier_release(struct kref *ref) +{ + /* + * This is required by affinity notifier. We don't have anything to + * free here. + */ +} + +static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + notify->irq = msix->msix.vector; + notify->notify = hfi1_irq_notifier_notify; + notify->release = hfi1_irq_notifier_release; + + if (irq_set_affinity_notifier(notify->irq, notify)) + pr_err("Failed to register sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + if (irq_set_affinity_notifier(notify->irq, NULL)) + pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +/* + * Function sets the irq affinity for msix. + * It *must* be called with node_affinity.lock held. + */ +static int get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; @@ -329,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; - spin_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: @@ -361,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) * finds its CPU here. */ if (cpu == -1 && set) { - spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -373,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&node_affinity.lock); - } - - switch (msix->type) { - case IRQ_SDMA: - sde->cpu = cpu; - break; - case IRQ_GENERAL: - case IRQ_RCVCTXT: - case IRQ_OTHER: - break; } cpumask_set_cpu(cpu, &msix->mask); @@ -392,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) extra, cpu); irq_set_affinity_hint(msix->msix.vector, &msix->mask); + if (msix->type == IRQ_SDMA) { + sde->cpu = cpu; + hfi1_setup_sdma_notifier(msix); + } + free_cpumask_var(diff); return 0; } +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +{ + int ret; + + mutex_lock(&node_affinity.lock); + ret = get_irq_affinity(dd, msix); + mutex_unlock(&node_affinity.lock); + return ret; +} + void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { @@ -403,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: set = &entry->def_intr; + hfi1_cleanup_sdma_notifier(msix); break; case IRQ_GENERAL: /* Don't do accounting for general contexts */ @@ -421,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; break; default: + mutex_unlock(&node_affinity.lock); return; } if (set) { - spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); + mutex_unlock(&node_affinity.lock); } /* This should be called with node_affinity.lock held */ @@ -536,7 +635,7 @@ int hfi1_get_proc_affinity(int node) if (!ret) goto free_available_mask; - spin_lock(&affinity->lock); + mutex_lock(&affinity->lock); /* * If we've used all available HW threads, clear the mask and start * overloading. @@ -644,7 +743,8 @@ int hfi1_get_proc_affinity(int node) cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&affinity->lock); + + mutex_unlock(&affinity->lock); hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); free_cpumask_var(intrs_mask); @@ -665,49 +765,53 @@ void hfi1_put_proc_affinity(int cpu) if (cpu < 0) return; - spin_lock(&affinity->lock); + + mutex_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&affinity->lock); + mutex_unlock(&affinity->lock); } -/* Prevents concurrent reads and writes of the sdma_affinity attrib */ -static DEFINE_MUTEX(sdma_affinity_mutex); - int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { struct hfi1_affinity_node *entry; - struct cpumask mask; + cpumask_var_t mask; int ret, i; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) - return -EINVAL; + if (!entry) { + ret = -EINVAL; + goto unlock; + } - ret = cpulist_parse(buf, &mask); + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) { + ret = -ENOMEM; + goto unlock; + } + + ret = cpulist_parse(buf, mask); if (ret) - return ret; + goto out; - if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { dd_dev_warn(dd, "Invalid CPU mask\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } - mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, &mask); - /* - * Reassign the affinity for each SDMA interrupt. - */ + cpumask_copy(&entry->def_intr.mask, mask); + + /* Reassign the affinity for each SDMA interrupt. */ for (i = 0; i < dd->num_msix_entries; i++) { struct hfi1_msix_entry *msix; @@ -715,13 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (msix->type != IRQ_SDMA) continue; - ret = hfi1_get_irq_affinity(dd, msix); + ret = get_irq_affinity(dd, msix); if (ret) break; } - - mutex_unlock(&sdma_affinity_mutex); +out: + free_cpumask_var(mask); +unlock: + mutex_unlock(&node_affinity.lock); return ret ? ret : strnlen(buf, PAGE_SIZE); } @@ -729,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) { struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) + if (!entry) { + mutex_unlock(&node_affinity.lock); return -EINVAL; + } - mutex_lock(&sdma_affinity_mutex); cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&sdma_affinity_mutex); + mutex_unlock(&node_affinity.lock); return strnlen(buf, PAGE_SIZE); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 8879cf7a8cac..b89ea3c0ee1a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -121,8 +121,7 @@ struct hfi1_affinity_node_list { int num_core_siblings; int num_online_nodes; int num_online_cpus; - /* protect affinity node list */ - spinlock_t lock; + struct mutex lock; /* protects affinity nodes */ }; int node_affinity_init(void); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b32638d58ae8..9bf5f23544d4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), - FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT), + FLAG_ENTRY0("External Device Request Timeout", + EXTERNAL_DEVICE_REQ_TIMEOUT), }; /* @@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work) set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work) * If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ - if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) { + if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) dc_shutdown(ppd->dd); - } else { - tune_serdes(ppd); + else start_link(ppd); - } } void handle_link_bounce(struct work_struct *work) @@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work) */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", @@ -7531,7 +7529,6 @@ done: set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -9161,6 +9158,12 @@ set_local_link_attributes_fail: */ int start_link(struct hfi1_pportdata *ppd) { + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + if (!ppd->link_enabled) { dd_dev_info(ppd->dd, "%s: stopping link start because link is disabled\n", @@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work) */ set_qsfp_int_n(ppd, 1); - tune_serdes(ppd); - start_link(ppd); } @@ -9490,6 +9491,73 @@ static void init_lcb(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00); } +/* + * Perform a test read on the QSFP. Return 0 on success, -ERRNO + * on error. + */ +static int test_qsfp_read(struct hfi1_pportdata *ppd) +{ + int ret; + u8 status; + + /* report success if not a QSFP */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; + + /* read byte 2, the status byte */ + ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1); + if (ret < 0) + return ret; + if (ret != 1) + return -EIO; + + return 0; /* success */ +} + +/* + * Values for QSFP retry. + * + * Give up after 10s (20 x 500ms). The overall timeout was empirically + * arrived at from experience on a large cluster. + */ +#define MAX_QSFP_RETRIES 20 +#define QSFP_RETRY_WAIT 500 /* msec */ + +/* + * Try a QSFP read. If it fails, schedule a retry for later. + * Called on first link activation after driver load. + */ +static void try_start_link(struct hfi1_pportdata *ppd) +{ + if (test_qsfp_read(ppd)) { + /* read failed */ + if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) { + dd_dev_err(ppd->dd, "QSFP not responding, giving up\n"); + return; + } + dd_dev_info(ppd->dd, + "QSFP not responding, waiting and retrying %d\n", + (int)ppd->qsfp_retry_count); + ppd->qsfp_retry_count++; + queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + msecs_to_jiffies(QSFP_RETRY_WAIT)); + return; + } + ppd->qsfp_retry_count = 0; + + start_link(ppd); +} + +/* + * Workqueue function to start the link after a delay. + */ +void handle_start_link(struct work_struct *work) +{ + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + start_link_work.work); + try_start_link(ppd); +} + int bringup_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; @@ -9525,14 +9593,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd) set_qsfp_int_n(ppd, 1); } - /* - * Tune the SerDes to a ballpark setting for - * optimal signal and bit error rate - * Needs to be done before starting the link - */ - tune_serdes(ppd); - - return start_link(ppd); + try_start_link(ppd); + return 0; } void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) @@ -9549,6 +9611,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->link_enabled = 0; + ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */ + flush_delayed_work(&ppd->start_link_work); + cancel_delayed_work_sync(&ppd->start_link_work); + ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, @@ -9648,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct hfi1_message_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) +struct ib_header *hfi1_get_msgheader( + struct hfi1_devdata *dd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (struct hfi1_message_header *) + return (struct ib_header *) (rhf_addr - dd->rhf_offset + offset); } @@ -11489,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; /* reset the cached receive header queue head value */ @@ -11557,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * update with a dummy tail address and then disable * receive context. */ - if (dd->rcvhdrtail_dummy_physaddr) { + if (dd->rcvhdrtail_dummy_dma) { write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); /* Enabling RcvCtxtCtrl.TailUpd is intentional. */ rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; } @@ -11570,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) + if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -11642,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * so it doesn't contain an address that is invalid. */ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); } u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) @@ -12865,7 +12931,7 @@ fail: */ static int set_up_context_variables(struct hfi1_devdata *dd) { - int num_kernel_contexts; + unsigned long num_kernel_contexts; int total_contexts; int ret; unsigned ngroups; @@ -12894,9 +12960,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { dd_dev_err(dd, - "Reducing # kernel rcv contexts to: %d, from %d\n", + "Reducing # kernel rcv contexts to: %d, from %lu\n", (int)(dd->chip_send_contexts - num_vls - 1), - (int)num_kernel_contexts); + num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } /* @@ -13319,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd) /* * Give up after 1ms - maximum wait time. * - * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at + * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at * 250MB/s bandwidth. Lower rate to 66% for overhead to get: - * 148 KB / (66% * 250MB/s) = 920us + * 136 KB / (66% * 250MB/s) = 844us */ if (count++ > 500) { dd_dev_err(dd, @@ -14500,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* call before get_platform_config(), after init_chip_resources() */ + ret = eprom_init(dd); + if (ret) + goto bail_free_rcverr; + /* Needs to be called before hfi1_firmware_init */ get_platform_config(dd); @@ -14620,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; - ret = eprom_init(dd); - if (ret) - goto bail_free_rcverr; - goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index ed11107c50fe..92345259a8f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -82,7 +82,7 @@ */ #define CM_VAU 3 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */ -#define CM_GLOBAL_CREDITS 0x940 +#define CM_GLOBAL_CREDITS 0x880 /* Number of PKey entries in the HW */ #define MAX_PKEY_VALUES 16 @@ -254,12 +254,14 @@ #define FAILED_LNI_VERIFY_CAP2 BIT(10) #define FAILED_LNI_CONFIGLT BIT(11) #define HOST_HANDSHAKE_TIMEOUT BIT(12) +#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ | FAILED_LNI_VERIFY_CAP1 \ | FAILED_LNI_VERIFY_CAP2 \ - | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT) + | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \ + | EXTERNAL_DEVICE_REQ_TIMEOUT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ #define HOST_REQ_DONE BIT(0) @@ -706,6 +708,7 @@ void handle_link_up(struct work_struct *work); void handle_link_down(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); +void handle_start_link(struct work_struct *work); void handle_sma_message(struct work_struct *work); void reset_qsfp(struct hfi1_pportdata *ppd); void qsfp_event(struct work_struct *work); @@ -1335,7 +1338,7 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct hfi1_message_header *hfi1_get_msgheader( +struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index fcc9c217a97a..da7be21bedb4 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -320,14 +320,6 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 -/* - * This structure contains the first field common to all protocols - * that employ this chip. - */ -struct hfi1_message_header { - __be16 lrh[4]; -}; - /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..632ba21759ab 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -59,6 +59,40 @@ static struct dentry *hfi1_dbg_root; +/* wrappers to enforce srcu in seq file */ +static ssize_t hfi1_seq_read( + struct file *file, + char __user *buf, + size_t size, + loff_t *ppos) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + ssize_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_read(file, buf, size, ppos); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static loff_t hfi1_seq_lseek( + struct file *file, + loff_t offset, + int whence) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + loff_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_lseek(file, offset, whence); + debugfs_use_file_finish(srcu_idx); + return r; +} + #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) @@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \ static const struct file_operations _##name##_file_ops = { \ .owner = THIS_MODULE, \ .open = _##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ .release = seq_release \ } @@ -105,11 +139,9 @@ do { \ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_opcode_stats_perctx *opstats; - rcu_read_lock(); if (*pos >= ARRAY_SIZE(opstats->stats)) return NULL; return pos; @@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _opcode_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _opcode_stats_seq_show(struct seq_file *s, void *v) @@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats) DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) + __acquires(RCU) { struct qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qp_iter *iter = iter_ptr; @@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) -__releases(RCU) + __releases(RCU) { rcu_read_unlock(); } @@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats) DEBUGFS_FILE_OPS(qp_stats); static void *_sdes_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_ibdev *ibd; struct hfi1_devdata *dd; - rcu_read_lock(); ibd = (struct hfi1_ibdev *)s->private; dd = dd_from_dev(ibd); if (!dd->per_sdma || *pos >= dd->num_sdma) @@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _sdes_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _sdes_seq_show(struct seq_file *s, void *v) @@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_portcntrs(dd->pport, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, struct hfi1_pportdata *ppd; ssize_t rval; - rcu_read_lock(); ppd = private2ppd(file); avail = hfi1_read_portcntrs(ppd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, int used; int i; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; size = PAGE_SIZE; used = 0; tmp = kmalloc(size, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); used += scnprintf(tmp + used, size - used, @@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, used += scnprintf(tmp + used, size - used, "Write bits to clear\n"); ret = simple_read_from_buffer(buf, count, ppos, tmp, used); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, u64 scratch0; u64 clear; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; buff = kmalloc(count + 1, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto do_return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, do_free: kfree(buff); - do_return: - rcu_read_unlock(); return ret; } @@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, char *tmp; int ret; - rcu_read_lock(); ppd = private2ppd(file); tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } ret = qsfp_dump(ppd, tmp, PAGE_SIZE); if (ret > 0) ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, int offset; int total_written; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, int offset; int total_read; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { @@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, int ret; int total_written; - rcu_read_lock(); - if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ - ret = -EINVAL; - goto _return; - } + if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */ + return -EINVAL; ppd = private2ppd(file); buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { ret = -EFAULT; goto _free; } - total_written = qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; @@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, int ret; int total_read; - rcu_read_lock(); if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ ret = -EINVAL; goto _return; @@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); _return: - rcu_read_unlock(); return ret; } @@ -948,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), }; +static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + ++*pos; + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + loff_t *spos = v; + loff_t i = *spos; + + sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); +DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) +DEBUGFS_FILE_OPS(sdma_cpu_list); + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -976,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) DEBUGFS_FILE_CREATE(cntr_ops[i].name, @@ -1006,7 +1029,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: ibd->hfi1_ibdev_dbg = NULL; - synchronize_rcu(); } /* @@ -1031,9 +1053,7 @@ static const char * const hfi1_statnames[] = { }; static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1051,9 +1071,7 @@ static void *_driver_stats_names_seq_next( } static void _driver_stats_names_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _driver_stats_names_seq_show(struct seq_file *s, void *v) @@ -1069,9 +1087,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names) DEBUGFS_FILE_OPS(driver_stats_names); static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1086,9 +1102,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _driver_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static u64 hfi1_sps_ints(void) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8246dc7d0573..6563e4d38b80 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, struct hfi1_packet *packet) { - struct hfi1_message_header *rhdr = packet->hdr; + struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr; - struct hfi1_other_headers *ohdr = NULL; + struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(hdr->lrh[1]); + u16 lid = be16_to_cpu(rhdr->lrh[1]); u32 qp_num; u32 rcv_flags = 0; @@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Check for GRH */ if (lnh == HFI1_LRH_BTH) { - ohdr = &hdr->u.oth; + ohdr = &rhdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { u32 vtf; - ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + ohdr = &rhdr->u.l.oth; + if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; rcv_flags |= HFI1_HAS_GRH; @@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, case IB_QPT_RC: hfi1_rc_hdrerr( rcd, - hdr, + rhdr, rcv_flags, qp); break; @@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_ib_header *hdr = pkt->hdr; - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_header *hdr = pkt->hdr; + struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); @@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); + sc = hdr2sc(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { @@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; struct rvt_qp *qp; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr; + struct ib_header *hdr; + struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = (struct hfi1_ib_header *) - hfi1_get_msgheader(dd, rhf_addr); + hdr = hfi1_get_msgheader(dd, rhf_addr); + lnh = be16_to_cpu(hdr->lrh[0]) & 3; if (lnh == HFI1_LRH_BTH) @@ -888,14 +887,15 @@ void set_all_slowpath(struct hfi1_devdata *dd) } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet packet, + struct hfi1_packet *packet, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, - packet.rhf_addr); + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + u8 etype = rhf_rcv_type(packet->rhf); - if (hdr2sc(hdr, packet.rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* Auto activate link on non-SC15 packet receive */ if (unlikely(rcd->ppd->host_link_state == HLS_UP_ARMED) && - set_armed_to_active(rcd, packet, dd)) + set_armed_to_active(rcd, &packet, dd)) goto bail; last = process_rcv_packet(&packet, thread); } diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 36b77943cbfd..e70c223801b4 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -49,7 +49,26 @@ #include "common.h" #include "eprom.h" +/* + * The EPROM is logically divided into three partitions: + * partition 0: the first 128K, visible from PCI ROM BAR + * partition 1: 4K config file (sector size) + * partition 2: the rest + */ +#define P0_SIZE (128 * 1024) +#define P1_SIZE (4 * 1024) +#define P1_START P0_SIZE +#define P2_START (P0_SIZE + P1_SIZE) + +/* controller page size, in bytes */ +#define EP_PAGE_SIZE 256 +#define EP_PAGE_MASK (EP_PAGE_SIZE - 1) +#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32)) + +/* controller commands */ #define CMD_SHIFT 24 +#define CMD_NOP (0) +#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) #define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) /* controller interface speeds */ @@ -61,6 +80,90 @@ * Double it for safety. */ #define EPROM_TIMEOUT 80000 /* ms */ + +/* + * Read a 256 byte (64 dword) EPROM page. + * All callers have verified the offset is at a page boundary. + */ +static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) +{ + int i; + + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); + for (i = 0; i < EP_PAGE_DWORDS; i++) + result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ +} + +/* + * Read length bytes starting at offset from the start of the EPROM. + */ +static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest) +{ + u32 buffer[EP_PAGE_DWORDS]; + u32 end; + u32 start_offset; + u32 read_start; + u32 bytes; + + if (len == 0) + return 0; + + end = start + len; + + /* + * Make sure the read range is not outside of the controller read + * command address range. Note that '>' is correct below - the end + * of the range is OK if it stops at the limit, but no higher. + */ + if (end > (1 << CMD_SHIFT)) + return -EINVAL; + + /* read the first partial page */ + start_offset = start & EP_PAGE_MASK; + if (start_offset) { + /* partial starting page */ + + /* align and read the page that contains the start */ + read_start = start & ~EP_PAGE_MASK; + read_page(dd, read_start, buffer); + + /* the rest of the page is available data */ + bytes = EP_PAGE_SIZE - start_offset; + + if (len <= bytes) { + /* end is within this page */ + memcpy(dest, (u8 *)buffer + start_offset, len); + return 0; + } + + memcpy(dest, (u8 *)buffer + start_offset, bytes); + + start += bytes; + len -= bytes; + dest += bytes; + } + /* start is now page aligned */ + + /* read whole pages */ + while (len >= EP_PAGE_SIZE) { + read_page(dd, start, buffer); + memcpy(dest, buffer, EP_PAGE_SIZE); + + start += EP_PAGE_SIZE; + len -= EP_PAGE_SIZE; + dest += EP_PAGE_SIZE; + } + + /* read the last partial page */ + if (len) { + read_page(dd, start, buffer); + memcpy(dest, buffer, len); + } + + return 0; +} + /* * Initialize the EPROM handler. */ @@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd) done_asic: return ret; } + +/* magic character sequence that trails an image */ +#define IMAGE_TRAIL_MAGIC "egamiAPO" + +/* + * Read all of partition 1. The actual file is at the front. Adjust + * the returned size if a trailing image magic is found. + */ +static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, + u32 *size) +{ + void *buffer; + void *p; + u32 length; + int ret; + + buffer = kmalloc(P1_SIZE, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + ret = read_length(dd, P1_START, P1_SIZE, buffer); + if (ret) { + kfree(buffer); + return ret; + } + + /* scan for image magic that may trail the actual data */ + p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); + if (p) + length = p - buffer; + else + length = P1_SIZE; + + *data = buffer; + *size = length; + return 0; +} + +/* + * Read the platform configuration file from the EPROM. + * + * On success, an allocated buffer containing the data and its size are + * returned. It is up to the caller to free this buffer. + * + * Return value: + * 0 - success + * -ENXIO - no EPROM is available + * -EBUSY - not able to acquire access to the EPROM + * -ENOENT - no recognizable file written + * -ENOMEM - buffer could not be allocated + */ +int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) +{ + u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */ + int ret; + + if (!dd->eprom_available) + return -ENXIO; + + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) + return -EBUSY; + + /* read the last page of P0 for the EPROM format magic */ + ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + if (ret) + goto done; + + /* last dword of P0 contains a magic indicator */ + if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* partition format */ + ret = read_partition_platform_config(dd, data, size); + goto done; + } + + /* nothing recognized */ + ret = -ENOENT; + +done: + release_chip_resource(dd, CR_EPROM); + return ret; +} diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..e774184f1643 100644 --- a/drivers/infiniband/hw/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h @@ -45,8 +45,8 @@ * */ -struct hfi1_cmd; struct hfi1_devdata; int eprom_init(struct hfi1_devdata *dd); -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd); +int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret, + u32 *size_ret); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1ecbec192358..677efa0e8cd6 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,7 +58,6 @@ #include "trace.h" #include "user_sdma.h" #include "user_exp_rcv.h" -#include "eprom.h" #include "aspm.h" #include "mmu_rb.h" @@ -183,6 +182,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; + atomic_inc(&fd->mm->mm_count); } fp->private_data = fd; @@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - setup_ctxt(fp); + ret = setup_ctxt(fp); if (ret) return ret; ret = user_init(fp); @@ -439,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd; - unsigned long flags, pfn; + unsigned long flags; u64 token = vma->vm_pgoff << PAGE_SHIFT, memaddr = 0; + void *memvirt = NULL; u8 subctxt, mapio = 0, vmf = 0, type; ssize_t memlen = 0; int ret = 0; @@ -492,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * second or third page allocated for credit returns (if number * of enabled contexts > 64 and 128 respectively). */ - memaddr = dd->cr_base[uctxt->numa_id].pa + + memvirt = dd->cr_base[uctxt->numa_id].va; + memaddr = virt_to_phys(memvirt) + (((u64)uctxt->sc->hw_free - (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); memlen = PAGE_SIZE; @@ -507,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memaddr = uctxt->rcvhdrq_phys; memlen = uctxt->rcvhdrq_size; + memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { unsigned long addr; @@ -532,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_flags &= ~VM_MAYWRITE; addr = vma->vm_start; for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { + memlen = uctxt->egrbufs.buffers[i].len; + memvirt = uctxt->egrbufs.buffers[i].addr; ret = remap_pfn_range( vma, addr, - uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT, - uctxt->egrbufs.buffers[i].len, + /* + * virt_to_pfn() does the same, but + * it's not available on x86_64 + * when CONFIG_MMU is enabled. + */ + PFN_DOWN(__pa(memvirt)), + memlen, vma->vm_page_prot); if (ret < 0) goto done; - addr += uctxt->egrbufs.buffers[i].len; + addr += memlen; } ret = 0; goto done; @@ -595,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EPERM; goto done; } - memaddr = uctxt->rcvhdrqtailaddr_phys; memlen = PAGE_SIZE; + memvirt = (void *)uctxt->rcvhdrtail_kvaddr; flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -649,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", ctxt, subctxt, type, mapio, vmf, memaddr, memlen, vma->vm_end - vma->vm_start, vma->vm_flags); - pfn = (unsigned long)(memaddr >> PAGE_SHIFT); if (vmf) { - vma->vm_pgoff = pfn; + vma->vm_pgoff = PFN_DOWN(memaddr); vma->vm_ops = &vm_ops; ret = 0; } else if (mapio) { - ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = io_remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); + } else if (memvirt) { + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(memvirt)), + memlen, + vma->vm_page_prot); } else { - ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); } done: @@ -779,6 +796,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + mmdrop(fdata->mm); kobject_put(&dd->kobj); kfree(fdata); return 0; @@ -959,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, */ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, uctxt->dd->node); - if (!uctxt->sc) - return -ENOMEM; - + if (!uctxt->sc) { + ret = -ENOMEM; + goto ctxdata_free; + } hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, uctxt->sc->hw_context); ret = sc_enable(uctxt->sc); if (ret) - return ret; + goto ctxdata_free; + /* * Setup shared context resources if the user-level has requested * shared contexts and this is the 'master' process. @@ -980,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, * send context because it will be done during file close */ if (ret) - return ret; + goto ctxdata_free; } uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ @@ -1000,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, fd->uctxt = uctxt; return 0; + +ctxdata_free: + dd->rcd[ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + return ret; } static int init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1258,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) uctxt->rcvhdrq); binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, fd->subctxt, - uctxt->egrbufs.rcvtids[0].phys); + uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, fd->subctxt, 0); /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1000e0fd96d9..7eef11b316ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,8 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <rdma/ib_hdrs.h> +#include <linux/rhashtable.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -171,12 +173,12 @@ struct ctxt_eager_bufs { u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; - dma_addr_t phys; + dma_addr_t dma; ssize_t len; } *buffers; struct { void *addr; - dma_addr_t phys; + dma_addr_t dma; } *rcvtids; }; @@ -207,8 +209,8 @@ struct hfi1_ctxtdata { /* size of each of the rcvhdrq entries */ u16 rcvhdrqentsize; /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t rcvhdrq_phys; - dma_addr_t rcvhdrqtailaddr_phys; + dma_addr_t rcvhdrq_dma; + dma_addr_t rcvhdrqtailaddr_dma; struct ctxt_eager_bufs egrbufs; /* this receive context's assigned PIO ACK send context */ struct send_context *sc; @@ -350,7 +352,7 @@ struct hfi1_packet { struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -529,6 +531,7 @@ struct hfi1_msix_entry { void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; + struct irq_affinity_notify notify; }; /* per-SL CCA information */ @@ -605,6 +608,7 @@ struct hfi1_pportdata { struct work_struct freeze_work; struct work_struct link_downgrade_work; struct work_struct link_bounce_work; + struct delayed_work start_link_work; /* host link state variables */ struct mutex hls_lock; u32 host_link_state; @@ -659,6 +663,7 @@ struct hfi1_pportdata { u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ u8 last_pstate; /* info only */ + u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ u8 overrun_threshold; @@ -1058,8 +1063,6 @@ struct hfi1_devdata { u8 psxmitwait_supported; /* cycle length of PS* counters in HW (in picoseconds) */ u16 psxmitwait_check_rate; - /* high volume overflow errors deferred to tasklet */ - struct tasklet_struct error_tasklet; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1162,7 +1165,7 @@ struct hfi1_devdata { /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; - dma_addr_t rcvhdrtail_dummy_physaddr; + dma_addr_t rcvhdrtail_dummy_dma; bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ @@ -1173,6 +1176,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct rhashtable sdma_rht; struct kobject kobj; }; @@ -1266,15 +1270,32 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) +static inline int hdr2sc(struct ib_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | ((!!(rhf_dc_info(rhf))) << 4); } +#define HFI1_JKEY_WIDTH 16 +#define HFI1_JKEY_MASK (BIT(16) - 1) +#define HFI1_ADMIN_JKEY_RANGE 32 + +/* + * J_KEYs are split and allocated in the following groups: + * 0 - 31 - users with administrator privileges + * 32 - 63 - kernel protocols using KDETH packets + * 64 - 65535 - all other users using KDETH packets + */ static inline u16 generate_jkey(kuid_t uid) { - return from_kuid(current_user_ns(), uid) & 0xffff; + u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK; + + if (capable(CAP_SYS_ADMIN)) + jkey &= HFI1_ADMIN_JKEY_RANGE - 1; + else if (jkey < 64) + jkey |= BIT(HFI1_JKEY_WIDTH - 1); + + return jkey; } /* @@ -1584,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_other_headers *ohdr = pkt->ohdr; u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); @@ -1656,7 +1677,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, const struct pci_device_id *); void hfi1_free_devdata(struct hfi1_devdata *); -void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1788,7 +1808,7 @@ extern unsigned int hfi1_max_mtu; extern unsigned int hfi1_cu; extern unsigned int user_credit_return_threshold; extern int num_user_contexts; -extern unsigned n_krcvqs; +extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; extern uint kdeth_qp; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a358d23ecd54..60db61536fed 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); /* computed based on above array */ -unsigned n_krcvqs; +unsigned long n_krcvqs; static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); @@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } return rcd; bail: + dd->rcd[ctxt] = NULL; kfree(rcd->egrbufs.rcvtids); kfree(rcd->egrbufs.buffers); kfree(rcd); @@ -500,6 +501,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); + INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link); INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); @@ -708,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) /* allocate dummy tail memory for all receive contexts */ dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent( &dd->pcidev->dev, sizeof(u64), - &dd->rcvhdrtail_dummy_physaddr, + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); if (!dd->rcvhdrtail_dummy_kvaddr) { @@ -941,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (rcd->rcvhdrq) { dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, - rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, (void *)rcd->rcvhdrtail_kvaddr, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } } @@ -955,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.rcvtids); for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].phys) + if (rcd->egrbufs.buffers[e].dma) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, - rcd->egrbufs.buffers[e].phys); + rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); @@ -1333,7 +1335,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_unlock(&ppd->cc_state_lock); if (cc_state) - call_rcu(&cc_state->rcu, cc_state_reclaim); + kfree_rcu(cc_state, rcu); } free_credit_return(dd); @@ -1353,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); dd->rcvhdrtail_dummy_kvaddr = NULL; } @@ -1576,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t phys_hdrqtail; + dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1589,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; rcd->rcvhdrq = dma_zalloc_coherent( - &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { @@ -1601,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; @@ -1633,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) * before enabling any receive context */ write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); return 0; @@ -1644,7 +1646,7 @@ bail_free: vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; bail: return -ENOMEM; @@ -1705,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[idx].addr = dma_zalloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, - &rcd->egrbufs.buffers[idx].phys, + &rcd->egrbufs.buffers[idx].dma, gfp_flags); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr = rcd->egrbufs.buffers[idx].addr; - rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys = - rcd->egrbufs.buffers[idx].phys; + rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma = + rcd->egrbufs.buffers[idx].dma; rcd->egrbufs.alloced++; alloced_bytes += rcd->egrbufs.rcvtid_size; idx++; @@ -1754,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (i = 0, j = 0, offset = 0; j < idx; i++) { if (i >= rcd->egrbufs.count) break; - rcd->egrbufs.rcvtids[i].phys = - rcd->egrbufs.buffers[j].phys + offset; + rcd->egrbufs.rcvtids[i].dma = + rcd->egrbufs.buffers[j].dma + offset; rcd->egrbufs.rcvtids[i].addr = rcd->egrbufs.buffers[j].addr + offset; rcd->egrbufs.alloced++; - if ((rcd->egrbufs.buffers[j].phys + offset + + if ((rcd->egrbufs.buffers[j].dma + offset + new_size) == - (rcd->egrbufs.buffers[j].phys + + (rcd->egrbufs.buffers[j].dma + rcd->egrbufs.buffers[j].len)) { j++; offset = 0; @@ -1813,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, - rcd->egrbufs.rcvtids[idx].phys, order); + rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } goto bail; @@ -1825,9 +1827,9 @@ bail_rcvegrbuf_phys: dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[idx].len, rcd->egrbufs.buffers[idx].addr, - rcd->egrbufs.buffers[idx].phys); + rcd->egrbufs.buffers[idx].dma); rcd->egrbufs.buffers[idx].addr = NULL; - rcd->egrbufs.buffers[idx].phys = 0; + rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } bail: diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1263abe01999..9487c9bb8920 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * offline. */ set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { set_link_state(ppd, link_state); @@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* - * Don't update pkeys[2], if an HFI port without MgmtAllowed - * by neighbor is a switch. - */ - if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) - continue; - /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. @@ -1819,6 +1812,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; + if (dd->pport->port_type != PORT_TYPE_QSFP) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) @@ -2599,7 +2597,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; - unsigned long port_num; + u8 port_num; unsigned long vl; u32 vl_select_mask; int vfi; @@ -2633,9 +2631,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } @@ -2837,7 +2835,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3010,7 +3008,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3247,7 +3245,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3398,7 +3396,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + kfree_rcu(old_cc_state, rcu); } static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, @@ -3553,13 +3551,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } -void cc_state_reclaim(struct rcu_head *rcu) -{ - struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu); - - kfree(cc_state); -} - static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ac1bf4a73571..50a3a36d9363 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -551,11 +551,11 @@ static inline u32 group_size(u32 group) } /* - * Obtain the credit return addresses, kernel virtual and physical, for the + * Obtain the credit return addresses, kernel virtual and bus, for the * given sc. * * To understand this routine: - * o va and pa are arrays of struct credit_return. One for each physical + * o va and dma are arrays of struct credit_return. One for each physical * send context, per NUMA. * o Each send context always looks in its relative location in a struct * credit_return for its credit return. @@ -563,14 +563,14 @@ static inline u32 group_size(u32 group) * with the same value. Use the address of the first send context in the * group. */ -static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa) +static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) { u32 gc = group_context(sc->hw_context, sc->group); u32 index = sc->hw_context & 0x7; sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; - *pa = (unsigned long) - &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc]; + *dma = (unsigned long) + &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; } /* @@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - dma_addr_t pa; + dma_addr_t dma; unsigned long flags; u64 reg; u32 thresh; @@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->sw_index = sw_index; sc->hw_context = hw_context; - cr_group_addresses(sc, &pa); + cr_group_addresses(sc, &dma); sc->credits = sci->credits; /* PIO Send Memory Address details */ @@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); /* set up credit return */ - reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); + reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); /* @@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd) dd->cr_base[i].va = dma_zalloc_coherent( &dd->pcidev->dev, bytes, - &dd->cr_base[i].pa, + &dd->cr_base[i].dma, GFP_KERNEL); if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd) TXE_NUM_CONTEXTS * sizeof(struct credit_return), dd->cr_base[i].va, - dd->cr_base[i].pa); + dd->cr_base[i].dma); } } kfree(dd->cr_base); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 464cbd27b975..e709eaf743b5 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -154,7 +154,7 @@ struct credit_return { /* NUMA indexed credit return array */ struct credit_return_base { struct credit_return *va; - dma_addr_t pa; + dma_addr_t dma; }; /* send context configuration sizes (one per type) */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..aa7773643107 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, preempt_enable(); } -/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ -#define USE_SHIFTS 1 -#ifdef USE_SHIFTS /* * Handle carry bytes using shifts and masks. * @@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, #define mshift(x) (8 * (x)) /* - * Read nbytes bytes from "from" and return them in the LSB bytes - * of pbuf->carry. Other bytes are zeroed. Any previous value - * pbuf->carry is lost. - * - * NOTES: - * o do not read from from if nbytes is zero - * o from may _not_ be u64 aligned - * o nbytes must not span a QW boundary - */ -static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) -{ - unsigned long off; - - if (nbytes == 0) { - pbuf->carry.val64 = 0; - } else { - /* align our pointer */ - off = (unsigned long)from & 0x7; - from = (void *)((unsigned long)from & ~0x7l); - pbuf->carry.val64 = ((*(u64 *)from) - << zshift(nbytes + off))/* zero upper bytes */ - >> zshift(nbytes); /* place at bottom */ - } - pbuf->carry_bytes = nbytes; -} - -/* - * Read nbytes bytes from "from" and put them at the next significant bytes - * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra - * read does not overfill carry. - * - * NOTES: - * o from may _not_ be u64 aligned - * o nbytes may span a QW boundary - */ -static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) -{ - unsigned long off = (unsigned long)from & 0x7; - unsigned int room, xbytes; - - /* align our pointer */ - from = (void *)((unsigned long)from & ~0x7l); - - /* check count first - don't read anything if count is zero */ - while (nbytes) { - /* find the number of bytes in this u64 */ - room = 8 - off; /* this u64 has room for this many bytes */ - xbytes = min(room, nbytes); - - /* - * shift down to zero lower bytes, shift up to zero upper - * bytes, shift back down to move into place - */ - pbuf->carry.val64 |= (((*(u64 *)from) - >> mshift(off)) - << zshift(xbytes)) - >> zshift(xbytes + pbuf->carry_bytes); - off = 0; - pbuf->carry_bytes += xbytes; - nbytes -= xbytes; - from += sizeof(u64); - } -} - -/* - * Zero extra bytes from the end of pbuf->carry. - * - * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - unsigned int remaining; - - if (zbytes == 0) /* nothing to do */ - return; - - remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ - - /* NOTE: zshift only guaranteed to work if remaining != 0 */ - if (remaining) - pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) - >> zshift(remaining); - else - pbuf->carry.val64 = 0; - pbuf->carry_bytes = remaining; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the LSB bytes of - * pbuf->carry with the upper bytes zeroed.. - * - * NOTES: - * o result must keep unused bytes zeroed - * o src must be u64 aligned - */ -static inline void merge_write8( - struct pio_buf *pbuf, - void __iomem *dest, - const void *src) -{ - u64 new, temp; - - new = *(u64 *)src; - temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); - writeq(temp, dest); - pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); -} - -/* - * Write a quad word using all bytes of carry. - */ -static inline void carry8_write8(union mix carry, void __iomem *dest) -{ - writeq(carry.val64, dest); -} - -/* - * Write a quad word using all the valid bytes of carry. If carry - * has zero valid bytes, nothing is written. - * Returns 0 on nothing written, non-zero on quad word written. - */ -static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) -{ - if (pbuf->carry_bytes) { - /* unused bytes are always kept zeroed, so just write */ - writeq(pbuf->carry.val64, dest); - return 1; - } - - return 0; -} - -#else /* USE_SHIFTS */ -/* - * Handle carry bytes using byte copies. - * - * NOTE: the value the unused portion of carry is left uninitialized. - */ - -/* * Jump copy - no-loop copy for < 8 bytes. */ static inline void jcopy(u8 *dest, const u8 *src, u32 n) @@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; + /* fall through */ case 6: *dest++ = *src++; + /* fall through */ case 5: *dest++ = *src++; + /* fall through */ case 4: *dest++ = *src++; + /* fall through */ case 3: *dest++ = *src++; + /* fall through */ case 2: *dest++ = *src++; + /* fall through */ case 1: *dest++ = *src++; + /* fall through */ } } @@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, unsigned int nbytes) { + pbuf->carry.val64 = 0; jcopy(&pbuf->carry.val8[0], from, nbytes); pbuf->carry_bytes = nbytes; } @@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf, } /* - * Zero extra bytes from the end of pbuf->carry. - * - * We do not care about the value of unused bytes in carry, so just - * reduce the byte count. + * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. + * Put the unused part of the next 8 bytes of src into the LSB bytes of + * pbuf->carry with the upper bytes zeroed.. * * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - pbuf->carry_bytes -= zbytes; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the low bytes of - * pbuf->carry. + * o result must keep unused bytes zeroed + * o src must be u64 aligned */ static inline void merge_write8( struct pio_buf *pbuf, - void *dest, + void __iomem *dest, const void *src) { - u32 remainder = 8 - pbuf->carry_bytes; + u64 new, temp; - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); - writeq(pbuf->carry.val64, dest); - jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); + new = *(u64 *)src; + temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); + writeq(temp, dest); + pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); } /* * Write a quad word using all bytes of carry. */ -static inline void carry8_write8(union mix carry, void *dest) +static inline void carry8_write8(union mix carry, void __iomem *dest) { writeq(carry.val64, dest); } @@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest) * has zero valid bytes, nothing is written. * Returns 0 on nothing written, non-zero on quad word written. */ -static inline int carry_write8(struct pio_buf *pbuf, void *dest) +static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) { if (pbuf->carry_bytes) { - u64 zero = 0; - - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, - 8 - pbuf->carry_bytes); + /* unused bytes are always kept zeroed, so just write */ writeq(pbuf->carry.val64, dest); return 1; } return 0; } -#endif /* USE_SHIFTS */ /* * Segmented PIO Copy - start @@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); void __iomem *dend; /* 8-byte data end */ - unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; - unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; + unsigned long qw_to_write = nbytes >> 3; + unsigned long bytes_left = nbytes & 0x7; /* calculate 8-byte data end */ dend = dest + (qw_to_write * sizeof(u64)); @@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - /* adjust carry */ - if (pbuf->carry_bytes < bytes_left) { - /* need to read more */ - read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); + pbuf->qw_written += qw_to_write; + + /* handle carry and left-over bytes */ + if (pbuf->carry_bytes + bytes_left >= 8) { + unsigned long nread; + + /* there is enough to fill another qw - fill carry */ + nread = 8 - pbuf->carry_bytes; + read_extra_bytes(pbuf, from, nread); + + /* + * One more write - but need to make sure dest is correct. + * Check for wrap and the possibility the write + * should be in SOP space. + * + * The two checks immediately below cannot both be true, hence + * the else. If we have wrapped, we cannot still be within the + * first block. Conversely, if we are still in the first block, + * we cannot have wrapped. We do the wrap check first as that + * is more likely. + */ + /* adjust if we have wrapped */ + if (dest >= pbuf->end) + dest -= pbuf->size; + /* jump to the SOP range if within the first block */ + else if (pbuf->qw_written < PIO_BLOCK_QWS) + dest += SOP_DISTANCE; + + /* flush out full carry */ + carry8_write8(pbuf->carry, dest); + pbuf->qw_written++; + + /* now adjust and read the rest of the bytes into carry */ + bytes_left -= nread; + from += nread; /* from is now not aligned */ + read_low_bytes(pbuf, from, bytes_left); } else { - /* remove invalid bytes */ - zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); + /* not enough to fill another qw, append the rest to carry */ + read_extra_bytes(pbuf, from, bytes_left); } - - pbuf->qw_written += qw_to_write; } /* @@ -771,6 +649,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_extra_bytes(pbuf, from, to_fill); from += to_fill; nbytes -= to_fill; + /* may not be enough valid bytes left to align */ + if (extra > nbytes) + extra = nbytes; /* ...now write carry */ dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); @@ -798,6 +679,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_low_bytes(pbuf, from, extra); from += extra; nbytes -= extra; + /* + * If no bytes are left, return early - we are done. + * NOTE: This short-circuit is *required* because + * "extra" may have been reduced in size and "from" + * is not aligned, as required when leaving this + * if block. + */ + if (nbytes == 0) + return; } /* at this point, from is QW aligned */ diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 965c8aef0c60..202433178864 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -47,29 +47,39 @@ #include "hfi.h" #include "efivar.h" +#include "eprom.h" void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; unsigned long size = 0; u8 *temp_platform_config = NULL; + u32 esize; + + ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + size = esize; + goto success; + } + /* fail, try EFI variable */ ret = read_hfi1_efi_var(dd, "configuration", &size, (void **)&temp_platform_config); - if (ret) { - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); - /* fall back to request firmware */ - platform_config_load = 1; - goto bail; - } + if (!ret) + goto success; + + dd_dev_info(dd, + "%s: Failed to get platform config from UEFI, falling back to request firmware\n", + __func__); + /* fall back to request firmware */ + platform_config_load = 1; + return; +success: dd->platform_config.data = temp_platform_config; dd->platform_config.size = size; - -bail: - /* exit */; } void free_platform_config(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index a5aa3517e7d5..9fc75e7e8781 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp) write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock_irqrestore(&dev->iowait_lock, flags); } @@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp) */ void hfi1_schedule_send(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); if (hfi1_send_ok(qp)) _hfi1_schedule_send(qp); } /** - * hfi1_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush + * hfi1_get_credit - handle credit in aeth + * @qp: the qp * @aeth: the Acknowledge Extended Transport Header * * The QP s_lock should be held. @@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; + lockdep_assert_held(&qp->s_lock); /* * If the credit is invalid, we can send * as many packets as we like. Otherwise, we have to @@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) } spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } static int iowait_sleep( @@ -544,7 +544,7 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) iter->dev = dev; iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - if (qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } @@ -812,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, kfree(priv); return ERR_PTR(-ENOMEM); } + iowait_init( + &priv->s_iowait, + 1, + _hfi1_do_send, + iowait_sleep, + iowait_wakeup, + iowait_sdma_drained); setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); qp->s_timer.function = hfi1_rc_timeout; return priv; @@ -852,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); flush_iowait(qp); hfi1_stop_rc_timers(qp); } @@ -877,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - iowait_init( - &priv->s_iowait, - 1, - _hfi1_do_send, - iowait_sleep, - iowait_wakeup, - iowait_sdma_drained); priv->r_adefered = 0; clear_ahg(qp); } @@ -967,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index a207717ade2a..1869f639c3ae 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, bus->algo.getsda = hfi1_getsda; bus->algo.getscl = hfi1_getscl; bus->algo.udelay = 5; - bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.timeout = usecs_to_jiffies(100000); bus->algo.data = bus; bus->adapter.owner = THIS_MODULE; @@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, u8 *data) { struct hfi1_pportdata *ppd; - u32 excess_len = 0; - int ret = 0; + u32 excess_len = len; + int ret = 0, offset = 0; if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", @@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, } memcpy(data, &ppd->qsfp_info.cache[addr], len); + + if (addr <= QSFP_MONITOR_VAL_END && + (addr + len) >= QSFP_MONITOR_VAL_START) { + /* Overlap with the dynamic channel monitor range */ + if (addr < QSFP_MONITOR_VAL_START) { + if (addr + len <= QSFP_MONITOR_VAL_END) + len = addr + len - QSFP_MONITOR_VAL_START; + else + len = QSFP_MONITOR_RANGE; + offset = QSFP_MONITOR_VAL_START - addr; + addr = QSFP_MONITOR_VAL_START; + } else if (addr == QSFP_MONITOR_VAL_START) { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_RANGE; + } else { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_VAL_END - addr + 1; + } + /* Refresh the values of the dynamic monitors from the cable */ + ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len); + if (ret != len) { + ret = -EAGAIN; + goto set_zeroes; + } + } + return 0; set_zeroes: diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index 69275ebd9597..36cf52359848 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -74,6 +74,9 @@ /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ +#define QSFP_MONITOR_VAL_START 22 +#define QSFP_MONITOR_VAL_END 81 +#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1) #define QSFP_TX_CTRL_BYTE_OFFS 86 #define QSFP_PWR_CTRL_BYTE_OFFS 93 #define QSFP_CDR_CTRL_BYTE_OFFS 98 diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5da190e6011b..8bc5013f39a1 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -55,7 +55,7 @@ #include "trace.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x +#define OP(x) RC_OP(x) /** * hfi1_add_retry_timer - add/start a retry timer @@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies + @@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); @@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + @@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) { int rval = 0; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry */ if (qp->s_flags & RVT_S_TIMER) { qp->s_flags &= ~RVT_S_TIMER; @@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); @@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) int rval = 0; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; @@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } -/* only opcode mask for adaptive pio */ -const u32 rc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_READ_REQUEST & 0x1f)) | - BIT(OP(ACKNOWLEDGE & 0x1f)) | - BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | - BIT(OP(COMPARE_SWAP & 0x1f)) | - BIT(OP(FETCH_ADD & 0x1f)); - static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; @@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 pmtu = qp->pmtu; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; @@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = mask_psn(e->psn); e->sent = 1; @@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; int delta; + lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) goto bail_no_tx; @@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * See restart_rc(). */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr + len); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr + len, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); @@ -841,7 +835,7 @@ bail_no_tx: * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. + * send side QP state and send engine. */ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, int is_fecn) @@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ @@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, if (!pbuf) { /* * We have no room to send at the moment. Pass - * responsibility for sending the ACK to the send tasklet + * responsibility for sending the ACK to the send engine * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ @@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, return; queue_ack: - this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; if (is_fecn) qp->s_flags |= RVT_S_ECN; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ hfi1_schedule_send(qp); +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; + lockdep_assert_held(&qp->s_lock); qp->s_cur = n; /* @@ -1027,7 +1025,7 @@ done: qp->s_psn = psn; /* * Set RVT_S_WAIT_PSN as rc_complete() may start the timer - * asynchronously before the send tasklet can get scheduled. + * asynchronously before the send engine can get scheduled. * Doing it in hfi1_make_rc_req() is too late. */ if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && @@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->s_retry == 0) { if (qp->s_mig_state == IB_MIG_ARMED) { hfi1_migrate_qp(qp); @@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe; u32 n = qp->s_last; + lockdep_assert_held(&qp->s_lock); /* Find the work request corresponding to the given PSN. */ for (;;) { wqe = rvt_get_swqe_ptr(qp, n); @@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; u32 psn; + lockdep_assert_held(&qp->s_lock); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct ib_wc wc; unsigned i; + lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; unsigned long to; + lockdep_assert_held(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, { struct rvt_swqe *wqe; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry timer */ hfi1_stop_rc_timers(qp); @@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, * Called at interrupt level. */ static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct hfi1_ctxtdata *rcd) @@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64)be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else { + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; - } if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; @@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, { if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) return; list_del_init(&qp->rspwait); qp->r_flags &= ~RVT_R_RSP_NAK; - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } /** @@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, +static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct hfi1_ctxtdata *rcd) { @@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, } if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, @@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, case OP(FETCH_ADD): { /* * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an + * or the send engine is already backed up to send an * earlier entry, we can ignore this request. */ if (!e || e->opcode != (u8)opcode || old_req) @@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; @@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int copy_last = 0; u32 rkey; + lockdep_assert_held(&qp->r_lock); bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; @@ -2342,7 +2343,7 @@ send_last: qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2397,7 +2398,7 @@ send_last: len = be32_to_cpu(reth->length); if (len) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2432,7 +2433,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2469,8 +2470,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2481,11 +2481,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *)qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64)atomic64_add_return(sdata, maddr) - sdata : (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2499,7 +2499,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2575,12 +2575,12 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp) { int has_grh = rcv_flags & HFI1_HAS_GRH; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); int diff; u32 opcode; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 48d5094f98e2..a1576aea4756 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -352,7 +352,7 @@ err: * * This is called from hfi1_do_send() to * forward a WQE addressed to the same HFI. - * Note that although we are single threaded due to the tasklet, we still + * Note that although we are single threaded due to the send engine, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. @@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps) { @@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work) * @work: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ void hfi1_do_send(struct rvt_qp *qp) @@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp) spin_unlock_irqrestore(&qp->s_lock, ps.flags); /* * If the packet cannot be sent now, return and - * the send tasklet will be woken up later. + * the send engine will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) return; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f9befc05b349..fd39bcaa062d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void) } /** + * sdma_engine_get_vl() - return vl for a given sdma engine + * @sde: sdma engine + * + * This function returns the vl mapped to a given engine, or an error if + * the mapping can't be found. The mapping fields are protected by RCU. + */ +int sdma_engine_get_vl(struct sdma_engine *sde) +{ + struct hfi1_devdata *dd = sde->dd; + struct sdma_vl_map *m; + u8 vl; + + if (sde->this_idx >= TXE_NUM_SDMA_ENGINES) + return -EINVAL; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + if (unlikely(!m)) { + rcu_read_unlock(); + return -EINVAL; + } + vl = m->engine_to_vl[sde->this_idx]; + rcu_read_unlock(); + + return vl; +} + +/** * sdma_select_engine_vl() - select sdma engine * @dd: devdata * @selector: a spreading factor @@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc( return sdma_select_engine_vl(dd, selector, vl); } +struct sdma_rht_map_elem { + u32 mask; + u8 ctr; + struct sdma_engine *sde[0]; +}; + +struct sdma_rht_node { + unsigned long cpu_id; + struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED]; + struct rhash_head node; +}; + +#define NR_CPUS_HINT 192 + +static const struct rhashtable_params sdma_rht_params = { + .nelem_hint = NR_CPUS_HINT, + .head_offset = offsetof(struct sdma_rht_node, node), + .key_offset = offsetof(struct sdma_rht_node, cpu_id), + .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .max_size = NR_CPUS, + .min_size = 8, + .automatic_shrinking = true, +}; + +/* + * sdma_select_user_engine() - select sdma engine based on user setup + * @dd: devdata + * @selector: a spreading factor + * @vl: this vl + * + * This function returns an sdma engine for a user sdma request. + * User defined sdma engine affinity setting is honored when applicable, + * otherwise system default sdma engine mapping is used. To ensure correct + * ordering, the mapping from <selector, vl> to sde must remain unchanged. + */ +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl) +{ + struct sdma_rht_node *rht_node; + struct sdma_engine *sde = NULL; + const struct cpumask *current_mask = tsk_cpus_allowed(current); + unsigned long cpu_id; + + /* + * To ensure that always the same sdma engine(s) will be + * selected make sure the process is pinned to this CPU only. + */ + if (cpumask_weight(current_mask) != 1) + goto out; + + cpu_id = smp_processor_id(); + rcu_read_lock(); + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + sdma_rht_params); + + if (rht_node && rht_node->map[vl]) { + struct sdma_rht_map_elem *map = rht_node->map[vl]; + + sde = map->sde[selector & map->mask]; + } + rcu_read_unlock(); + + if (sde) + return sde; + +out: + return sdma_select_engine_vl(dd, selector, vl); +} + +static void sdma_populate_sde_map(struct sdma_rht_map_elem *map) +{ + int i; + + for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++) + map->sde[map->ctr + i] = map->sde[i]; +} + +static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map, + struct sdma_engine *sde) +{ + unsigned int i, pow; + + /* only need to check the first ctr entries for a match */ + for (i = 0; i < map->ctr; i++) { + if (map->sde[i] == sde) { + memmove(&map->sde[i], &map->sde[i + 1], + (map->ctr - i - 1) * sizeof(map->sde[0])); + map->ctr--; + pow = roundup_pow_of_two(map->ctr ? : 1); + map->mask = pow - 1; + sdma_populate_sde_map(map); + break; + } + } +} + +/* + * Prevents concurrent reads and writes of the sdma engine cpu_mask + */ +static DEFINE_MUTEX(process_to_sde_mutex); + +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count) +{ + struct hfi1_devdata *dd = sde->dd; + cpumask_var_t mask, new_mask; + unsigned long cpu; + int ret, vl, sz; + + vl = sdma_engine_get_vl(sde); + if (unlikely(vl < 0)) + return -EINVAL; + + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) { + free_cpumask_var(mask); + return -ENOMEM; + } + ret = cpulist_parse(buf, mask); + if (ret) + goto out_free; + + if (!cpumask_subset(mask, cpu_online_mask)) { + dd_dev_warn(sde->dd, "Invalid CPU mask\n"); + ret = -EINVAL; + goto out_free; + } + + sz = sizeof(struct sdma_rht_map_elem) + + (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *)); + + mutex_lock(&process_to_sde_mutex); + + for_each_cpu(cpu, mask) { + struct sdma_rht_node *rht_node; + + /* Check if we have this already mapped */ + if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { + cpumask_set_cpu(cpu, new_mask); + continue; + } + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (!rht_node) { + rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); + if (!rht_node) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + if (!rht_node->map[vl]) { + kfree(rht_node); + ret = -ENOMEM; + goto out; + } + rht_node->cpu_id = cpu; + rht_node->map[vl]->mask = 0; + rht_node->map[vl]->ctr = 1; + rht_node->map[vl]->sde[0] = sde; + + ret = rhashtable_insert_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + if (ret) { + kfree(rht_node->map[vl]); + kfree(rht_node); + dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n", + cpu); + goto out; + } + + } else { + int ctr, pow; + + /* Add new user mappings */ + if (!rht_node->map[vl]) + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + + if (!rht_node->map[vl]) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl]->ctr++; + ctr = rht_node->map[vl]->ctr; + rht_node->map[vl]->sde[ctr - 1] = sde; + pow = roundup_pow_of_two(ctr); + rht_node->map[vl]->mask = pow - 1; + + /* Populate the sde map table */ + sdma_populate_sde_map(rht_node->map[vl]); + } + cpumask_set_cpu(cpu, new_mask); + } + + /* Clean up old mappings */ + for_each_cpu(cpu, cpu_online_mask) { + struct sdma_rht_node *rht_node; + + /* Don't cleanup sdes that are set in the new mask */ + if (cpumask_test_cpu(cpu, mask)) + continue; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (rht_node) { + bool empty = true; + int i; + + /* Remove mappings for old sde */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + if (rht_node->map[i]) + sdma_cleanup_sde_map(rht_node->map[i], + sde); + + /* Free empty hash table entries */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i]) + continue; + + if (rht_node->map[i]->ctr) { + empty = false; + break; + } + } + + if (empty) { + ret = rhashtable_remove_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + WARN_ON(ret); + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); + } + } + } + + cpumask_copy(&sde->cpu_mask, new_mask); +out: + mutex_unlock(&process_to_sde_mutex); +out_free: + free_cpumask_var(mask); + free_cpumask_var(new_mask); + return ret ? : strnlen(buf, PAGE_SIZE); +} + +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + mutex_lock(&process_to_sde_mutex); + if (cpumask_empty(&sde->cpu_mask)) + snprintf(buf, PAGE_SIZE, "%s\n", "empty"); + else + cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask); + mutex_unlock(&process_to_sde_mutex); + return strnlen(buf, PAGE_SIZE); +} + +static void sdma_rht_free(void *ptr, void *arg) +{ + struct sdma_rht_node *rht_node = ptr; + int i; + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); +} + +/** + * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings + * @s: seq file + * @dd: hfi1_devdata + * @cpuid: cpu id + * + * This routine dumps the process to sde mappings per cpu + */ +void sdma_seqfile_dump_cpu_list(struct seq_file *s, + struct hfi1_devdata *dd, + unsigned long cpuid) +{ + struct sdma_rht_node *rht_node; + int i, j; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + sdma_rht_params); + if (!rht_node) + return; + + seq_printf(s, "cpu%3lu: ", cpuid); + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i] || !rht_node->map[i]->ctr) + continue; + + seq_printf(s, " vl%d: [", i); + + for (j = 0; j < rht_node->map[i]->ctr; j++) { + if (!rht_node->map[i]->sde[j]) + continue; + + if (j > 0) + seq_puts(s, ","); + + seq_printf(s, " sdma%2d", + rht_node->map[i]->sde[j]->this_idx); + } + seq_puts(s, " ]"); + } + + seq_puts(s, "\n"); +} + /* * Free the indicated map struct */ @@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->num_sdma = num_engines; if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) goto bail; + + if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + goto bail; + dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; @@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); + rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* @@ -2086,6 +2439,11 @@ nodesc: * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit + * @count: pointer to a u32 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * @@ -2100,18 +2458,18 @@ nodesc: * side locking. * * Return: - * > 0 - Success (value is number of sdma_txreq's submitted), + * 0 - Success, * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list) + struct list_head *tx_list, u32 *count_out) { struct sdma_txreq *tx, *tx_next; int ret = 0; unsigned long flags; u16 tail = INVALID_TAIL; - int count = 0; + u32 submit_count = 0, flush_count = 0, total_count; spin_lock_irqsave(&sde->tail_lock, flags); retry: @@ -2127,33 +2485,34 @@ retry: } list_del_init(&tx->list); tail = submit_tx(sde, tx); - count++; + submit_count++; if (tail != INVALID_TAIL && - (count & SDMA_TAIL_UPDATE_THRESH) == 0) { + (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) { sdma_update_tail(sde, tail); tail = INVALID_TAIL; } } update_tail: + total_count = submit_count + flush_count; if (wait) - iowait_sdma_add(wait, count); + iowait_sdma_add(wait, total_count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); - return ret == 0 ? count : ret; + *count_out = total_count; + return ret; unlock_noconn: spin_lock(&sde->flushlist_lock); list_for_each_entry_safe(tx, tx_next, tx_list, list) { tx->wait = wait; list_del_init(&tx->list); - if (wait) - iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; trace_hfi1_sdma_in_sn(sde, tx->sn); #endif list_add_tail(&tx->list, &sde->flushlist); + flush_count++; if (wait) { wait->tx_count++; wait->count += tx->num_desc; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..56257ea3598f 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -413,6 +413,8 @@ struct sdma_engine { spinlock_t flushlist_lock; /* private: */ struct list_head flushlist; + struct cpumask cpu_mask; + struct kobject kobj; }; int sdma_init(struct hfi1_devdata *dd, u8 port); @@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde, struct sdma_txreq *tx); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list); + struct list_head *tx_list, + u32 *count); int sdma_ahg_alloc(struct sdma_engine *sde); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); @@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl( u32 selector, u8 vl); +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl); +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf); +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count); +int sdma_engine_get_vl(struct sdma_engine *sde); void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *); +void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd, + unsigned long cpuid); #ifdef CONFIG_SDMA_VERBOSITY void sdma_dumpstate(struct sdma_engine *); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 74c84c655f7e..edba22461a9c 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -766,13 +766,95 @@ bail: return ret; } +struct sde_attribute { + struct attribute attr; + ssize_t (*show)(struct sdma_engine *sde, char *buf); + ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt); +}; + +static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!sde_attr->show) + return -EINVAL; + + return sde_attr->show(sde, buf); +} + +static ssize_t sde_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!sde_attr->store) + return -EINVAL; + + return sde_attr->store(sde, buf, count); +} + +static const struct sysfs_ops sde_sysfs_ops = { + .show = sde_show, + .store = sde_store, +}; + +static struct kobj_type sde_ktype = { + .sysfs_ops = &sde_sysfs_ops, +}; + +#define SDE_ATTR(_name, _mode, _show, _store) \ + struct sde_attribute sde_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + return sdma_get_cpu_to_sde_map(sde, buf); +} + +static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde, + const char *buf, size_t count) +{ + return sdma_set_cpu_to_sde_map(sde, buf, count); +} + +static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) +{ + int vl; + + vl = sdma_engine_get_vl(sde); + if (vl < 0) + return vl; + + return snprintf(buf, PAGE_SIZE, "%d\n", vl); +} + +static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, + sde_show_cpu_to_sde_map, + sde_store_cpu_to_sde_map); +static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL); + +static struct sde_attribute *sde_attribs[] = { + &sde_attr_cpu_list, + &sde_attr_vl +}; + /* * Register and create our files in /sys/class/infiniband. */ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) { struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; - int i, ret; + struct device *class_dev = &dev->dev; + int i, j, ret; for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { ret = device_create_file(&dev->dev, hfi1_attributes[i]); @@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) goto bail; } + for (i = 0; i < dd->num_sdma; i++) { + ret = kobject_init_and_add(&dd->per_sdma[i].kobj, + &sde_ktype, &class_dev->kobj, + "sdma%d", i); + if (ret) + goto bail; + + for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) { + ret = sysfs_create_file(&dd->per_sdma[i].kobj, + &sde_attribs[j]->attr); + if (ret) + goto bail; + } + } + return 0; bail: for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); + + for (i = 0; i < dd->num_sdma; i++) + kobject_del(&dd->per_sdma[i].kobj); + return ret; } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 4cfb13771897..01f525cd985a 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,9 +47,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) +u8 ibhdr_exhdr_len(struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u8 opcode; u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); @@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" #define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %lld" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" +#define ATOMICACKETH_PRN "origdata %llx" +#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op -static u64 ib_u64_get(__be32 *p) -{ - return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); -} - static const char *parse_syndrome(u8 syndrome) { switch (syndrome >> 5) { @@ -113,8 +108,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, RETH_PRN " " IMM_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length), be32_to_cpu(eh->rc.imm_data)); @@ -126,8 +120,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY): case OP(UC, RDMA_WRITE_ONLY): trace_seq_printf(p, RETH_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length)); break; @@ -145,20 +138,16 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->at.aeth) >> 24, parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24), be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK, - (unsigned long long) - ib_u64_get(eh->at.atomic_ack_eth)); + ib_u64_get(&eh->at.atomic_ack_eth)); break; /* atomiceth */ case OP(RC, COMPARE_SWAP): case OP(RC, FETCH_ADD): trace_seq_printf(p, ATOMICETH_PRN, - (unsigned long long)ib_u64_get( - eh->atomic_eth.vaddr), + get_ib_ateth_vaddr(&eh->atomic_eth), eh->atomic_eth.rkey, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.compare_data)); + get_ib_ateth_swap(&eh->atomic_eth), + get_ib_ateth_compare(&eh->atomic_eth)); break; /* deth */ case OP(UD, SEND_ONLY): diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 31654bbac1cf..26ae789e47cf 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, hw_free) __field(void __iomem *, piobase) __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) + __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) - __field(u64, rcvegr_phys) + __field(u64, rcvegr_dma) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; @@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; + __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; ), TP_printk("[%s] ctxt %u " UCTXT_FMT, __get_str(dev), @@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, + __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_phys + __entry->rcvegr_dma ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index c3e41aed0034..382fcda3a5f6 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,7 +55,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -74,7 +74,7 @@ __print_symbolic(lrh, \ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), + struct ib_header *hdr), TP_ARGS(dd, hdr), TP_STRUCT__entry( DD_DEV_ENTRY(dd) @@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); /* LRH */ @@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, ); DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 9ba1f615ec95..11e02b228922 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate, TRACE_EVENT(snoop_capture, TP_PROTO(struct hfi1_devdata *dd, int hdr_len, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, int data_len, void *data), TP_ARGS(dd, hdr_len, hdr, data_len, data), @@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture, __dynamic_array(u8, raw_pkt, data_len) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); if (__entry->lnh == HFI1_LRH_BTH) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index a726d96d185f..5e6d1bac4914 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -50,14 +50,7 @@ #include "qp.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/* only opcode mask for adaptive pio */ -const u32 uc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); +#define OP(x) UC_OP(x) /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) @@ -70,7 +63,7 @@ const u32 uc_only_opcode = int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords = 5; u32 bth0 = 0; @@ -304,12 +297,12 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f01e8e1d62d3..97ae24b6314c 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -271,7 +271,7 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; @@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, /* * opa_smp_check() - Do the regular pkey checking, and the additional - * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26 - * ("SMA Packet Checks"). + * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section + * 9.10.25 ("SMA Packet Checks"). * * Note that: * - Checks are done using the pkey directly from the packet's BTH, @@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /* * SMPs fall into one of four (disjoint) categories: - * SMA request, SMA response, trap, or trap repress. - * Our response depends, in part, on which type of - * SMP we're processing. + * SMA request, SMA response, SMA trap, or SMA trap repress. + * Our response depends, in part, on which type of SMP we're + * processing. * - * If this is not an SMA request, or trap repress: - * - accept MAD if the port is running an SM - * - pkey == FULL_MGMT_P_KEY => - * reply with unsupported method (i.e., just mark - * the smp's status field here, and let it be - * processed normally) - * - pkey != LIM_MGMT_P_KEY => - * increment port recv constraint errors, drop MAD - * If this is an SMA request or trap repress: + * If this is an SMA response, skip the check here. + * + * If this is an SMA request or SMA trap repress: * - pkey != FULL_MGMT_P_KEY => * increment port recv constraint errors, drop MAD + * + * Otherwise: + * - accept if the port is running an SM + * - drop MAD if it's an SMA trap + * - pkey == FULL_MGMT_P_KEY => + * reply with unsupported method + * - pkey != FULL_MGMT_P_KEY => + * increment port recv constraint errors, drop MAD */ switch (smp->method) { + case IB_MGMT_METHOD_GET_RESP: + case IB_MGMT_METHOD_REPORT_RESP: + break; case IB_MGMT_METHOD_GET: case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_REPORT: @@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, return 1; } break; - case IB_MGMT_METHOD_SEND: - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_GET_RESP: - case IB_MGMT_METHOD_REPORT_RESP: + default: if (ibp->rvp.port_cap_flags & IB_PORT_SM) return 0; + if (smp->method == IB_MGMT_METHOD_TRAP) + return 1; if (pkey == FULL_MGMT_P_KEY) { smp->status |= IB_SMP_UNSUP_METHOD; return 0; } - if (pkey != LIM_MGMT_P_KEY) { - ingress_pkey_table_fail(ppd, pkey, slid); - return 1; - } - break; - default: - break; + ingress_pkey_table_fail(ppd, pkey, slid); + return 1; } return 0; } @@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; @@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + u8 sc5 = hdr2sc(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0ecf27903dc2..a761f804111e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_SHIFT 24 #define KDETH_HCRC_LOWER_MASK 0xff +#define AHG_KDETH_INTR_SHIFT 12 + #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -546,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, u8 opcode, sc, vl; int req_queued = 0; u16 dlid; - u8 selector; + u32 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -751,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, dlid = be16_to_cpu(req->hdr.lrh[1]); selector = dlid_to_selector(dlid); + selector += uctxt->ctxt + fd->subctxt; + req->sde = sdma_select_user_engine(dd, selector, vl); - /* Have to select the engine */ - req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt + - selector), - vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; goto free_req; @@ -892,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { - int ret = 0; + int ret = 0, count; unsigned npkts = 0; struct user_sdma_txreq *tx = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL; @@ -1088,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) npkts++; } dosend: - ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); - if (list_empty(&req->txps)) { - req->seqsubmitted = req->seqnum; - if (req->seqnum == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); - /* - * The txreq has already been submitted to the HW queue - * so we can free the AHG entry now. Corruption will not - * happen due to the sequential manner in which - * descriptors are processed. - */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) - sdma_ahg_free(req->sde, req->ahg_idx); - } - } else if (ret > 0) { - req->seqsubmitted += ret; - ret = 0; + ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); + req->seqsubmitted += count; + if (req->seqsubmitted == req->info.npkts) { + set_bit(SDMA_REQ_SEND_DONE, &req->flags); + /* + * The txreq has already been submitted to the HW queue + * so we can free the AHG entry now. Corruption will not + * happen due to the sequential manner in which + * descriptors are processed. + */ + if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + sdma_ahg_free(req->sde, req->ahg_idx); } return ret; @@ -1480,7 +1474,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* Clear KDETH.SH on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) >> 16); + INTR) << + AHG_KDETH_INTR_SHIFT); val &= cpu_to_le16(~(1U << 13)); AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } else { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index f803f7b5ef5d..4b7a16ceb362 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF; module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); -unsigned int hfi1_max_cqes = 0x2FFFF; +unsigned int hfi1_max_cqes = 0x2FFFFF; module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); MODULE_PARM_DESC(max_cqes, "Maximum number of completion queue entries to support"); @@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF; module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); -unsigned int hfi1_max_qps = 16384; +unsigned int hfi1_max_qps = 32768; module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); @@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, - [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, @@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_CNP] = &hfi1_cnp_rcv }; +#define OPMASK 0x1f + +static const u32 pio_opmask[BIT(3)] = { + /* RC */ + [IB_OPCODE_RC >> 5] = + BIT(RC_OP(SEND_ONLY) & OPMASK) | + BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | + BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(COMPARE_SWAP) & OPMASK) | + BIT(RC_OP(FETCH_ADD) & OPMASK), + /* UC */ + [IB_OPCODE_UC >> 5] = + BIT(UC_OP(SEND_ONLY) & OPMASK) | + BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), +}; + /* * System image GUID. */ @@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -719,7 +741,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct hfi1_ib_header *hdr; + struct ib_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); @@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ib_header *h = &tx->phdr.hdr; + struct ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_GSI: case IB_QPT_UD: break; - case IB_QPT_RC: - if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0 && - !sdma_txreq_built(&tx->txreq)) - return dd->process_pio_send; - break; case IB_QPT_UC: + case IB_QPT_RC: { + u8 op = get_opcode(h); + if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; + } default: break; } @@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; - struct hfi1_ib_header *hdr; + struct ib_other_headers *ohdr; + struct ib_header *hdr; send_routine sr; int ret; u8 lnh; @@ -1755,7 +1773,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; @@ -1782,7 +1800,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + sc5 = hdr2sc(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d1b101c54828..1c3815d89eb7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -60,6 +60,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_mad.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> #include <rdma/rdmavt_cq.h> @@ -80,16 +81,6 @@ struct hfi1_packet; */ #define HFI1_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -104,80 +95,16 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK BIT(31) -#define IB_BTH_SOLICITED BIT(23) -#define IB_BTH_MIG_REQ BIT(22) - -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define RC_OP(x) IB_OPCODE_RC_##x +#define UC_OP(x) IB_OPCODE_UC_##x + /* flags passed by hfi1_ib_rcv() */ enum { HFI1_HAS_GRH = (1 << 0), }; -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -union ib_ehdrs { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; -} __packed; - -struct hfi1_other_headers { - __be32 bth[3]; - union ib_ehdrs u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct hfi1_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct hfi1_other_headers oth; - } l; - struct hfi1_other_headers oth; - } u; -} __packed; - struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -187,7 +114,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct hfi1_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp); @@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_stop_rc_timers(struct rvt_qp *qp); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); @@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct hfi1_ib_header *h) +static inline u8 get_opcode(struct ib_header *h) { u16 lnh = be16_to_cpu(h->lrh[0]) & 3; @@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index d8fb056526f8..094ab829ec42 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index b738acdb9b02..8ec09e470f84 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -232,7 +232,7 @@ struct i40iw_device { struct i40e_client *client; struct i40iw_hw hw; struct i40iw_cm_core cm_core; - unsigned long *mem_resources; + u8 *mem_resources; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; @@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev, *next = resource_num + 1; if (*next == max_resources) *next = 0; - spin_unlock_irqrestore(&iwdev->resource_lock, flags); *req_resource_num = resource_num; + spin_unlock_irqrestore(&iwdev->resource_lock, flags); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index c490f8d49864..85637696f6e9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); @@ -3350,26 +3350,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp) } /** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - -/** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ @@ -3641,7 +3621,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 3ee0cad96bc6..0c92a40b3e86 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp, info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR)) info.reset_tcp_conn = true; + iwqp->hw_iwarp_state = state; i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 798335fa3105..ac2f3cd9478c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = { .notifier_call = i40iw_net_event }; -static int i40iw_notifiers_registered; +static atomic_t i40iw_notifiers_registered; /** * i40iw_find_i40e_handler - find a handler given a client info @@ -1342,12 +1342,11 @@ exit: */ static void i40iw_register_notifiers(void) { - if (!i40iw_notifiers_registered) { + if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { register_inetaddr_notifier(&i40iw_inetaddr_notifier); register_inet6addr_notifier(&i40iw_inetaddr6_notifier); register_netevent_notifier(&i40iw_net_notifier); } - i40iw_notifiers_registered++; } /** @@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: - if (i40iw_notifiers_registered > 0) { - i40iw_notifiers_registered--; + if (!atomic_dec_return(&i40iw_notifiers_registered)) { unregister_netevent_notifier(&i40iw_net_notifier); unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); @@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0e8db0a35141..6fd043b1d714 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, { if (!mem) return I40IW_ERR_PARAM; + /* + * mem->va points to the parent of mem, so both mem and mem->va + * can not be touched once mem->va is freed + */ kfree(mem->va); - mem->va = NULL; return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338877bf..6329c971c22f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return &iwqp->ibqp; error: i40iw_free_qp_resources(iwdev, iwqp, qp_num); - kfree(mem); return ERR_PTR(err_code); } @@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) } if (iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); - kfree(iwpbl->iwmr); - iwpbl->iwmr = NULL; + kfree(iwmr); return 0; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 2f0b4eed7eae..1ea686b9e0f9 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@ -689,12 +687,6 @@ repoll: is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && - is_send)) { - pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; - } - /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { @@ -720,12 +712,6 @@ repoll: */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); - if (unlikely(!mqp)) { - pr_warn("CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -738,11 +724,6 @@ repoll: /* SRQ is also in the radix tree */ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, srq_num); - if (unlikely(!msrq)) { - pr_warn("CQ %06x with entry for unknown SRQN %06x\n", - cq->mcq.cqn, srq_num); - return -EINVAL; - } } if (is_send) { @@ -852,9 +833,11 @@ repoll: if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); @@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; - int err = 0; struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); @@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } for (npolled = 0; npolled < num_entries; ++npolled) { - err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) break; } @@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return npolled; } int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1301a1db958c..1672907ff219 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1171,6 +1171,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2263,6 +2284,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1811eb5b6aab..b597e8227591 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2287,6 +2287,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 7d30be0f287b..a21d37f02f35 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 8db7cb1a3716..35141f451e5c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 16f654dc8a46..570bc866b1d6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2509,24 +2509,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 1188fef08450..79d017baf6f4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -552,12 +552,6 @@ repoll: * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -618,13 +612,6 @@ repoll: read_lock(&dev->mdev->priv.mkey_table.lock); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmkey)) { - read_unlock(&dev->mdev->priv.mkey_table.lock); - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", - cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); - return -EINVAL; - } - mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -675,7 +662,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -687,8 +673,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); - if (err) + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } @@ -697,10 +682,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return soft_polled + npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f4160d56dc4f..22174774dbb8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -37,7 +37,6 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> -#include <linux/io-mapping.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif @@ -328,7 +327,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { @@ -1547,6 +1548,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); @@ -2050,6 +2058,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio_tx = NULL; @@ -2085,7 +2094,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 40df2cca0609..996b54e366b0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; i = 0; @@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; - m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp))); + m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); skip = 1 << m; mask = skip - 1; base = pfn; @@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, } else { if (base + p != pfn) { tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; base = pfn; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1df8a67d4f02..dcdcd195fe53 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -406,6 +406,7 @@ enum mlx5_ib_qp_flags { /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, + MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 9d97a71a1335..41f4c2afbcdd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1467,6 +1467,7 @@ create_tir: kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; + qp->flags |= MLX5_IB_QP_RSS; return 0; err: @@ -3744,12 +3745,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { - int err = 0; - - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { - err = -ENOMEM; - return err; - } + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); @@ -3765,7 +3762,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; - return err; + return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, @@ -3844,7 +3841,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; + err = -EINVAL; *bad_wr = wr; goto out; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16740dcb876b..67fc0b6857e1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq = (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; - attr->max_send_sge = ((rsp->max_write_send_sge & + attr->max_send_sge = ((rsp->max_recv_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); - attr->max_recv_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_recv_sge = (rsp->max_recv_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT; attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; - attr->max_rdma_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_rdma_sge = (rsp->max_wr_rd_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0efc9662c6d8..37df4481bb8f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -554,9 +554,9 @@ enum { OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, @@ -612,6 +612,8 @@ enum { OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF, }; struct ocrdma_mbx_query_config { @@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config { struct ocrdma_mbx_rsp rsp; u32 qp_srq_cq_ird_ord; u32 max_pd_ca_ack_delay; - u32 max_write_send_sge; + u32 max_recv_send_sge; u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; @@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; + u32 max_wr_rd_sge; + u32 ird_pgsz_num_pages; }; struct ocrdma_fw_ver_rsp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 71d0534960d6..6af44f8db3d5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index bbf0a163aeab..a3e21a25cea5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include "qib_common.h" @@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern u16 qpt_mask; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5e75b43c596b..5bad8e3b40bb 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) + __acquires(RCU) { struct qib_qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qib_qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qib_qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qib_qp_iter *iter = iter_ptr; @@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 67ee6438cf59..728e0a030d2e 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, ret = 1; else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { /* For TIDERR and RC QPs premptively schedule a NAK */ - struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; - struct qib_other_headers *ohdr = NULL; + struct ib_header *hdr = (struct ib_header *)rhdr; + struct ib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -588,8 +588,7 @@ move_along: qib_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } bail: diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fcdf37913a26..c3edc033f7c4 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct qib_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } + if (pos != 0 || count != sizeof(struct qib_flash)) + return -EINVAL; - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); dd = private2dd(file); if (qib_eeprom_write(dd, pos, tmp, count)) { @@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf, bail_tmp: kfree(tmp); - -bail: return ret; } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ce4034071f9c..ded27172320e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd) u32 *hdr; u64 pbc; const unsigned hdrwords = 7; - static struct qib_ib_header ibhdr = { + static struct ib_header ibhdr = { .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), .lrh[1] = IB_LID_PERMISSIVE, .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cc0aae1d781..99d31efe4c2f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -41,14 +41,6 @@ #include "qib.h" -/* - * mask field which was present in now deleted qib_qpn_table - * is not present in rvt_qpn_table. Defining the same field - * as qpt_mask here instead of adding the mask field to - * rvt_qpn_table. - */ -u16 qpt_mask; - static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) { @@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, static inline unsigned find_next_offset(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off, - unsigned n) + unsigned n, u16 qpt_mask) { if (qpt_mask) { off++; @@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, verbs_dev); + u16 qpt_mask = dd->qpn_mask; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, goto bail; } offset = find_next_offset(qpt, map, offset, - dd->n_krcv_queues); + dd->n_krcv_queues, qpt_mask); qpn = mk_qpn(qpt, map, offset); /* * This test differs from alloc_pidmap(). @@ -573,10 +566,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) return NULL; iter->dev = dev; - if (qib_qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 444028a3582a..2097512e75aa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp) * Note the QP s_lock must be held. */ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, - struct qib_other_headers *ohdr, u32 pmtu) + struct ib_other_headers *ohdr, u32 pmtu) { struct rvt_ack_entry *e; u32 hwords; @@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = qib_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn & QIB_PSN_MASK; e->sent = 1; @@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; u32 hwords; @@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp) u32 hwords; u32 pbufn; u32 __iomem *piobuf; - struct qib_ib_header hdr; - struct qib_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; u32 control; unsigned long flags; @@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; @@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * Called at interrupt level. */ static void qib_rc_rcv_resp(struct qib_ibport *ibp, - struct qib_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, @@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) @@ -1608,7 +1600,7 @@ bail: * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static int qib_rc_rcv_error(struct qib_other_headers *ohdr, +static int qib_rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, @@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n) * for the given QP. * Called at interrupt level. */ -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; @@ -2177,8 +2169,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2189,11 +2180,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2233,7 +2224,7 @@ rnr_nak: /* Queue RNR NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2245,7 +2236,7 @@ nack_op_err: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2259,7 +2250,7 @@ nack_inv: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b67779256297..de1bde5950f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the qib_migrate_qp() call. */ -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2) { struct qib_qp_priv *priv = qp->priv; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1d61bd04f449..5b2d483451ad 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -48,7 +48,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords; u32 bth0; @@ -236,10 +236,10 @@ bail: * for the given QP. * Called at interrupt level. */ -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 10d062561bd9..f45cad1198b0 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -245,7 +245,7 @@ drop: int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; struct qib_ibport *ibp; @@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) * for the given QP. * Called at interrupt level. */ -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; int opcode; u32 hdrsize; u32 pad; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 2d7e52619b55..954f15064514 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) * for the given QP. * Called at interrupt level. */ -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) { struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_ib_header *hdr = rhdr; + struct ib_header *hdr = rhdr; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; int lnh; @@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) if (tx->wqe) qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct qib_ib_header *hdr; + struct ib_header *hdr; if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) hdr = &tx->align_buf->hdr; @@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) return ret; } -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp) return ret; } -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1133,7 +1133,7 @@ done: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@ -1607,8 +1607,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); - qpt_mask = dd->qpn_mask; - INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 736ced684842..94fd30fdedac 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -45,6 +45,7 @@ #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> @@ -63,16 +64,6 @@ struct qib_verbs_txreq; */ #define QIB_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -87,22 +78,9 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK (1 << 31) -#define IB_BTH_SOLICITED (1 << 23) -#define IB_BTH_MIG_REQ (1 << 22) - /* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ #define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls) } } -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -struct qib_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; - } u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct qib_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct qib_other_headers oth; - } l; - struct qib_other_headers oth; - } u; -} __packed; - struct qib_pio_header { __le32 pbc[2]; - struct qib_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -191,7 +117,7 @@ struct qib_pio_header { * is made common. */ struct qib_qp_priv { - struct qib_ib_header *s_hdr; /* next packet header to send */ + struct ib_header *s_hdr; /* next packet header to send */ struct list_head iowait; /* link for wait PIO buf */ atomic_t s_dma_busy; struct qib_verbs_txreq *s_tx; @@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); void qib_put_txreq(struct qib_verbs_txreq *tx); -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len); void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, @@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release); -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr); +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err); int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); void mr_rcu_callback(struct rcu_head *list); @@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2); void _qib_do_send(struct work_struct *work); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f4a52d..0a89a955550b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -664,7 +664,8 @@ static int __init usnic_ib_init(void) return err; } - if (pci_register_driver(&usnic_ib_pci_driver)) { + err = pci_register_driver(&usnic_ib_pci_driver); + if (err) { usnic_err("Unable to register with PCI\n"); goto out_umem_fini; } diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 33076a5eee2f..01f71caa3ac4 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -138,6 +138,21 @@ static void rvt_unmap_sg(struct ib_device *dev, /* This is a stub, nothing to be done here */ } +static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_map_sg(dev, sgl, nents, direction); +} + +static void rvt_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_unmap_sg(dev, sg, nents, direction); +} + static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -177,6 +192,8 @@ struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = { .unmap_page = rvt_dma_unmap_page, .map_sg = rvt_map_sg, .unmap_sg = rvt_unmap_sg, + .map_sg_attrs = rvt_map_sg_attrs, + .unmap_sg_attrs = rvt_unmap_sg_attrs, .sync_single_for_cpu = rvt_sync_single_for_cpu, .sync_single_for_device = rvt_sync_single_for_device, .alloc_coherent = rvt_dma_alloc_coherent, diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b401b8..46b64970058e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bdb540f25a88..6500c3b5a89c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } /** - * reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset + * rvt_init_qp - initialize the QP state to the reset state + * @qp: the QP to init or reinit * @type: the QP type - * r and s lock are required to be held by the caller + * + * This function is called from both rvt_create_qp() and + * rvt_reset_qp(). The difference is that the reset + * patch the necessary locks to protect against concurent + * access. */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) - __releases(&qp->s_lock) - __releases(&qp->s_hlock) - __releases(&qp->r_lock) - __acquires(&qp->r_lock) - __acquires(&qp->s_hlock) - __acquires(&qp->s_lock) +static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - - /* Let drivers flush their waitlist */ - rdi->driver_f.flush_qp_waiters(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - - /* Stop the send queue and the retry timer */ - rdi->driver_f.stop_send_queue(qp); - - /* Wait for things to stop */ - rdi->driver_f.quiesce_qp(qp); - - /* take qp out the hash and wait for it to be unused */ - rvt_remove_qp(rdi, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* grab the lock b/c it was locked at call time */ - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - - rvt_clear_mr_refs(qp, 1); - } - - /* - * Let the driver do any tear down it needs to for a qp - * that has been reset - */ - rdi->driver_f.notify_qp_reset(qp); - qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; @@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** + * rvt_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + * + * r_lock, s_hlock, and s_lock are required to be held by the caller + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) + __must_hold(&qp->s_lock) + __must_hold(&qp->s_hlock) + __must_hold(&qp->r_lock) +{ + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + /* + * Let the driver do any tear down or re-init it needs to for + * a qp that has been reset + */ + rdi->driver_f.notify_qp_reset(qp); + } + rvt_init_qp(rdi, qp, type); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - rvt_reset_qp(rdi, qp, init_attr->qp_type); + rvt_init_qp(rdi, qp, init_attr->qp_type); break; default: @@ -873,7 +890,8 @@ bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - vfree(qp->r_rq.wq); + if (!qp->ip) + vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); @@ -905,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; @@ -979,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; unsigned long flags; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { @@ -996,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** - * qib_modify_qp - modify the attributes of a queue pair + * rvt_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f0ca79..ab6c3c25d7ff 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -358,18 +358,15 @@ static int __init rxe_module_init(void) /* initialize slab caches for managed objects */ err = rxe_cache_init(); if (err) { - pr_err("rxe: unable to init object pools\n"); + pr_err("unable to init object pools\n"); return err; } err = rxe_net_init(); - if (err) { - pr_err("rxe: unable to init\n"); - rxe_cache_exit(); + if (err) return err; - } - pr_info("rxe: loaded\n"); + pr_info("loaded\n"); return 0; } @@ -379,8 +376,8 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_cache_exit(); - pr_info("rxe: unloaded\n"); + pr_info("unloaded\n"); } -module_init(rxe_module_init); +late_initcall(rxe_module_init); module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 12c71c549f97..a696af81e4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -34,6 +34,11 @@ #ifndef RXE_H #define RXE_H +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/skbuff.h> #include <linux/crc32.h> diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 5c9474212d4e..604f6fee96bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -39,7 +39,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) struct rxe_port *port; if (attr->port_num != 1) { - pr_info("rxe: invalid port_num = %d\n", attr->port_num); + pr_info("invalid port_num = %d\n", attr->port_num); return -EINVAL; } @@ -47,7 +47,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) if (attr->ah_flags & IB_AH_GRH) { if (attr->grh.sgid_index > port->attr.gid_tbl_len) { - pr_info("rxe: invalid sgid index = %d\n", + pr_info("invalid sgid index = %d\n", attr->grh.sgid_index); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de44095..6c5e29db88e3 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -567,7 +567,8 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("state = %s\n", comp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -689,7 +690,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -702,7 +710,8 @@ int rxe_completer(void *arg) qp->comp.rnr_retry--; qp->req.need_retry = 1; - pr_debug("set rnr nak timer\n"); + pr_debug("qp#%d set rnr nak timer\n", + qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); @@ -716,6 +725,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c index 7634c1a81b2b..a0f8af5851ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_dma.c +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -117,6 +117,21 @@ static void rxe_unmap_sg(struct ib_device *dev, WARN_ON(!valid_dma_direction(direction)); } +static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rxe_map_sg(dev, sgl, nents, direction); +} + +static void rxe_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + rxe_unmap_sg(dev, sg, nents, direction); +} + static void rxe_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -159,6 +174,8 @@ struct ib_dma_mapping_ops rxe_dma_mapping_ops = { .unmap_page = rxe_dma_unmap_page, .map_sg = rxe_map_sg, .unmap_sg = rxe_unmap_sg, + .map_sg_attrs = rxe_map_sg_attrs, + .unmap_sg_attrs = rxe_unmap_sg_attrs, .sync_single_for_cpu = rxe_sync_single_for_cpu, .sync_single_for_device = rxe_sync_single_for_device, .alloc_coherent = rxe_dma_alloc_coherent, diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4a5484ef604f..73849a5a91b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -198,7 +198,7 @@ void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) { qp->resp.res_head++; - if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) + if (unlikely(qp->resp.res_head == qp->attr.max_dest_rd_atomic)) qp->resp.res_head = 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 54b3c7c99eff..c572a4c09359 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -126,7 +126,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + pr_err("err %d from remap_vmalloc_range\n", ret); goto done; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index f3dab6574504..1869152f1d23 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -39,7 +39,7 @@ */ static u8 rxe_get_key(void) { - static unsigned key = 1; + static u32 key = 1; key = key << 1; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea8b41d..b8258e4f0aea 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -65,7 +65,7 @@ struct rxe_dev *net_to_rxe(struct net_device *ndev) return found; } -struct rxe_dev *get_rxe_by_name(const char* name) +struct rxe_dev *get_rxe_by_name(const char *name) { struct rxe_dev *rxe; struct rxe_dev *found = NULL; @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -349,14 +350,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route4(rxe->ndev, saddr, daddr); if (!dst) { @@ -375,12 +376,12 @@ static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) return 0; } -static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route6(rxe->ndev, saddr, daddr); if (!dst) { @@ -407,9 +408,9 @@ static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_av *av = rxe_get_av(pkt); if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(rxe, skb, av); + err = prepare4(rxe, pkt, skb, av); else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(rxe, skb, av); + err = prepare6(rxe, pkt, skb, av); *crc = rxe_icrc_hdr(pkt, skb); @@ -600,8 +601,7 @@ void rxe_port_up(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_UP; rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); - pr_info("rxe: set %s active\n", rxe->ib_dev.name); - return; + pr_info("set %s active\n", rxe->ib_dev.name); } /* Caller must hold net_info_lock */ @@ -614,8 +614,7 @@ void rxe_port_down(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; rxe_port_event(rxe, IB_EVENT_PORT_ERR); - pr_info("rxe: set %s down\n", rxe->ib_dev.name); - return; + pr_info("set %s down\n", rxe->ib_dev.name); } static int rxe_notify(struct notifier_block *not_blk, @@ -640,7 +639,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); + pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_REBOOT: @@ -650,7 +649,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("rxe: ignoring netdev event = %ld for %s\n", + pr_info("ignoring netdev event = %ld for %s\n", event, ndev->name); break; } @@ -658,51 +657,68 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); - return -1; - } - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); + htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } + return 0; +} - return err; +int rxe_net_ipv6_init(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + + spin_lock_init(&dev_list_lock); + + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("Failed to create IPv6 UDP tunnel\n"); + return -1; + } +#endif + return 0; } void rxe_net_exit(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); + unregister_netdevice_notifier(&rxe_net_notifier); +} - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); +int rxe_net_init(void) +{ + int err; - unregister_netdevice_notifier(&rxe_net_notifier); + recv_sockets.sk6 = NULL; + + err = rxe_net_ipv4_init(); + if (err) + return err; + err = rxe_net_ipv6_init(); + if (err) + goto err_out; + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("Failed to register netdev notifier\n"); + goto err_out; + } + return 0; +err_out: + rxe_net_exit(); + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76d16cc..1c06b3bfe1b6 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,6 +44,8 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 22ba24f2a2c1..b8036cfbce04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -146,7 +146,7 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { int i; - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); @@ -174,7 +174,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) struct resp_res *res; if (qp->resp.resources) { - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); } @@ -298,8 +298,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp->rq.max_wr, qp->rq.max_sge, wqe_size); + pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, @@ -596,14 +596,21 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + qp->attr.max_rd_atomic = max_rd_atomic; + atomic_set(&qp->req.rd_atomic, max_rd_atomic); + } + + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + int max_dest_rd_atomic = + __roundup_pow_of_two(attr->max_dest_rd_atomic); + + qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; + free_rd_atomic_resources(qp); - err = alloc_rd_atomic_resources(qp, max_rd_atomic); + err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic); if (err) return err; - - qp->attr.max_rd_atomic = max_rd_atomic; - atomic_set(&qp->req.rd_atomic, max_rd_atomic); } if (mask & IB_QP_CUR_STATE) @@ -673,24 +680,27 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("set retry count = %d\n", attr->retry_cnt); + pr_debug("qp#%d set retry count = %d\n", qp_num(qp), + attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("set rnr retry count = %d\n", attr->rnr_retry); + pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), + attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("set resp psn = 0x%x\n", qp->resp.psn); + pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), + qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("set min rnr timer = 0x%x\n", + pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), attr->min_rnr_timer); } @@ -698,12 +708,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("set req psn = 0x%x\n", qp->req.psn); - } - - if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp->attr.max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); + pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -717,38 +722,38 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp state -> RESET\n"); + pr_debug("qp#%d state -> RESET\n", qp_num(qp)); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp state -> INIT\n"); + pr_debug("qp#%d state -> INIT\n", qp_num(qp)); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp state -> RTR\n"); + pr_debug("qp#%d state -> RTR\n", qp_num(qp)); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp state -> RTS\n"); + pr_debug("qp#%d state -> RTS\n", qp_num(qp)); qp->req.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp state -> SQD\n"); + pr_debug("qp#%d state -> SQD\n", qp_num(qp)); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp state -> SQE !!?\n"); + pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp state -> ERR\n"); + pr_debug("qp#%d state -> ERR\n", qp_num(qp)); rxe_qp_error(qp); break; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c23e08b..46f062842a9a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); @@ -387,7 +387,8 @@ int rxe_rcv(struct sk_buff *skb) pack_icrc = be32_to_cpu(*icrcp); calc_icrc = rxe_icrc_hdr(pkt, skb); - calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), + payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { char saddr[sizeof(struct in6_addr)]; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d77021..832846b73ea0 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -38,7 +38,7 @@ #include "rxe_queue.h" static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode); + u32 opcode); static inline void retry_first_write_send(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -121,7 +121,7 @@ void rnr_nak_timer(unsigned long data) { struct rxe_qp *qp = (struct rxe_qp *)data; - pr_debug("rnr nak timer fired\n"); + pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); rxe_run_task(&qp->req.task, 1); } @@ -187,7 +187,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) return wqe; } -static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -259,7 +259,7 @@ static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) return -EINVAL; } -static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -311,7 +311,7 @@ static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) } static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode) + u32 opcode) { int fits = (wqe->dma.resid <= qp->mtu); @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} - qp->req.opcode = pkt->opcode; +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -566,12 +588,13 @@ int rxe_requester(void *arg) struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; - unsigned mask; + enum rxe_hdr_mask mask; int payload; int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -603,7 +626,8 @@ next_wqe: rmr = rxe_pool_get_index(&rxe->mr_pool, wqe->wr.ex.invalidate_rkey >> 8); if (!rmr) { - pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); + pr_err("No mr for key %#x\n", + wqe->wr.ex.invalidate_rkey); wqe->state = wqe_state_error; wqe->status = IB_WC_MW_BIND_ERR; goto exit; @@ -679,22 +703,30 @@ next_wqe: skb = init_req_packet(qp, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("Failed allocating skb\n"); + pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); goto err; } if (fill_packet(qp, wqe, &pkt, skb, payload)) { - pr_debug("Error during fill packet\n"); + pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b46e2ad..dd3d88adc003 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -383,7 +383,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, * too many read/atomic ops, we just * recycle the responder resource queue */ - if (likely(qp->attr.max_rd_atomic > 0)) + if (likely(qp->attr.max_dest_rd_atomic > 0)) return RESPST_CHK_LENGTH; else return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; @@ -749,6 +749,18 @@ static enum resp_states read_reply(struct rxe_qp *qp, return state; } +static void build_rdma_network_hdr(union rdma_network_hdr *hdr, + struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(hdr, 0, sizeof(*hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh)); +} + /* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere. */ @@ -761,13 +773,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { union rdma_network_hdr hdr; - struct sk_buff *skb = PKT_TO_SKB(pkt); - memset(&hdr, 0, sizeof(hdr)); - if (skb->protocol == htons(ETH_P_IP)) - memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); - else if (skb->protocol == htons(ETH_P_IPV6)) - memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); + build_rdma_network_hdr(&hdr, pkt); err = send_data_in(qp, &hdr, sizeof(hdr)); if (err) @@ -881,7 +888,8 @@ static enum resp_states do_complete(struct rxe_qp *qp, rmr = rxe_pool_get_index(&rxe->mr_pool, wc->ex.invalidate_rkey >> 8); if (unlikely(!rmr)) { - pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); + pr_err("Bad rkey %#x invalidation\n", + wc->ex.invalidate_rkey); return RESPST_ERROR; } rmr->state = RXE_MEM_STATE_FREE; @@ -972,11 +980,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1126,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); @@ -1207,7 +1216,8 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("state = %s\n", resp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index cf8e77800046..d5ed7571128f 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -79,7 +79,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); err = -EINVAL; goto err; } @@ -92,20 +92,20 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) } if (net_to_rxe(ndev)) { - pr_err("rxe: already configured on %s\n", intf); + pr_err("already configured on %s\n", intf); err = -EINVAL; goto err; } rxe = rxe_net_add(ndev); if (!rxe) { - pr_err("rxe: failed to add %s\n", intf); + pr_err("failed to add %s\n", intf); err = -EINVAL; goto err; } rxe_set_port_state(ndev); - pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); + pr_info("added %s to %s\n", rxe->ib_dev.name, intf); err: if (ndev) dev_put(ndev); @@ -120,7 +120,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); return -EINVAL; } @@ -133,7 +133,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) rxe = get_rxe_by_name(intf); if (!rxe) { - pr_err("rxe: not configured on %s\n", intf); + pr_err("not configured on %s\n", intf); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4552be960c6a..19841c863daf 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -100,10 +100,12 @@ static int rxe_query_port(struct ib_device *dev, rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); speed = cmd.speed; } else { - pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); + pr_warn("%s speed is unknown, defaulting to 1000\n", + rxe->ndev->name); speed = 1000; } - rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, + &attr->active_width); mutex_unlock(&rxe->usdev_lock); return 0; @@ -761,7 +763,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, } static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, - unsigned mask, u32 length) + unsigned int mask, u32 length) { int err; struct rxe_sq *sq = &qp->sq; @@ -801,26 +803,15 @@ err1: return err; } -static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) { int err = 0; - struct rxe_qp *qp = to_rqp(ibqp); unsigned int mask; unsigned int length = 0; int i; int must_sched; - if (unlikely(!qp->valid)) { - *bad_wr = wr; - return -EINVAL; - } - - if (unlikely(qp->req.state < QP_STATE_READY)) { - *bad_wr = wr; - return -EINVAL; - } - while (wr) { mask = wr_opcode_mask(wr->opcode, qp); if (unlikely(!mask)) { @@ -861,6 +852,29 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } +static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + if (qp->is_user) { + /* Utilize process context to do protocol processing */ + rxe_run_task(&qp->req.task, 0); + return 0; + } else + return rxe_post_send_kernel(qp, wr, bad_wr); +} + static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { @@ -1133,8 +1147,8 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int *sg_offset) +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) { struct rxe_mem *mr = to_rmr(ibmr); int n; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7899167536e3..7b8d2d9e2263 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 951d9abcca8b..4ad297d3de89 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) } } +#define QPN_AND_OPTIONS_OFFSET 4 + static void ipoib_cm_tx_start(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, @@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ipoib_neigh *neigh; struct ipoib_cm_tx *p; unsigned long flags; + struct ipoib_path *path; int ret; struct ib_sa_path_rec pathrec; @@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; + qpn = IPOIB_QPN(neigh->daddr); + /* + * As long as the search is with these 2 locks, + * path existence indicates its validity. + */ + path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); + if (!path) { + pr_info("%s ignore not valid path %pI6\n", + __func__, + neigh->daddr + QPN_AND_OPTIONS_OFFSET); + goto free_neigh; + } memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) spin_lock_irqsave(&priv->lock, flags); if (ret) { +free_neigh: neigh = p->neigh; if (neigh) { neigh->cm = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241b9406..be11d5d5b8c1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e95c02ee05c0..5636fc3da6b8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) return -EINVAL; } -static struct ipoib_path *__path_find(struct net_device *dev, void *gid) +struct ipoib_path *__path_find(struct net_device *dev, void *gid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 8df608ede366..6dd43f63238e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn) INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); + init_waitqueue_head(&isert_conn->rem_wait); kref_init(&isert_conn->kref); mutex_init(&isert_conn->mutex); INIT_WORK(&isert_conn->release_work, isert_release_work); @@ -448,7 +449,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } @@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn) BUG_ON(!device); isert_free_rx_descriptors(isert_conn); - if (isert_conn->cm_id) + if (isert_conn->cm_id && + !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); if (isert_conn->qp) { @@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn) isert_device_put(device); - kfree(isert_conn); + if (isert_conn->dev_removed) + wake_up_interruptible(&isert_conn->rem_wait); + else + kfree(isert_conn); } static void @@ -753,6 +758,7 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", @@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ - case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_conn = cma_id->qp->qp_context; + isert_conn->dev_removed = true; + isert_disconnected_handler(cma_id, event->event); + wait_event_interruptible(isert_conn->rem_wait, + isert_conn->state == ISER_CONN_DOWN); + kfree(isert_conn); + /* + * return non-zero from the callback to destroy + * the rdma cm id + */ + return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index fc791efe3a10..c02ada57d7f5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -158,6 +158,8 @@ struct isert_conn { struct work_struct release_work; bool logout_posted; bool snd_w_inv; + wait_queue_head_t rem_wait; + bool dev_removed; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 48a44af740a6..0b1f69ed2e92 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device) sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); |