summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c18
-rw-r--r--drivers/infiniband/core/multicast.c13
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig1
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c300
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h10
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h5
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c220
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h3
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c139
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h9
-rw-r--r--drivers/infiniband/hw/hfi1/common.h8
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c184
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c42
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c185
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c63
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h46
-rw-r--r--drivers/infiniband/hw/hfi1/init.c50
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c33
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c258
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c36
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c34
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h3
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c146
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c377
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c103
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c31
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h13
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h14
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c15
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c45
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h93
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c26
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c12
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c4
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c40
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx4/main.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c37
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c22
-rw-r--r--drivers/infiniband/hw/mlx5/main.c18
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c6
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c13
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c14
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h12
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c4
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c26
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c73
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h94
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c17
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c122
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_dma.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c102
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c76
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c25
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c9
104 files changed, 2274 insertions, 1591 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8954792f1acc..36bf50ebb187 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
- if (!ndev)
- return -ENODEV;
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err2;
+ }
if (ndev->flags & IFF_LOOPBACK) {
dev_put(ndev);
- if (!id_priv->id.device->get_netdev)
- return -EOPNOTSUPP;
+ if (!id_priv->id.device->get_netdev) {
+ ret = -EOPNOTSUPP;
+ goto err2;
+ }
ndev = id_priv->id.device->get_netdev(id_priv->id.device,
id_priv->id.port_num);
- if (!ndev)
- return -ENODEV;
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err2;
+ }
}
route->path_rec->net = &init_net;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 49ecde98a3d9..e51b739f6ea3 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -106,7 +106,6 @@ struct mcast_group {
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
- int query_id;
u16 pkey_index;
u8 leave_state;
int retries;
@@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member)
member->multicast.comp_mask,
3000, GFP_KERNEL, join_handler, group,
&group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
+ return (ret > 0) ? 0 : ret;
}
static int send_leave(struct mcast_group *group, u8 leave_state)
@@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
IB_SA_MCMEMBER_REC_JOIN_STATE,
3000, GFP_KERNEL, leave_handler,
group, &group->query);
- if (ret >= 0) {
- group->query_id = ret;
- ret = 0;
- }
- return ret;
+ return (ret > 0) ? 0 : ret;
}
static void join_group(struct mcast_group *group, struct mcast_member *member,
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index 23f38cf2c5cd..afe8b28e0878 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_CXGB4
tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+ select CHELSIO_LIB
select GENERIC_ALLOCATOR
---help---
This is an iWARP/RDMA driver for the Chelsio T4 and T5
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index e11cf7299945..fa40b685831b 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,4 +1,5 @@
ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index c9661d8f11dc..f1510cc76d2d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -49,6 +49,7 @@
#include <rdma/ib_addr.h>
+#include <libcxgb_cm.h>
#include "iw_cxgb4.h"
#include "clip_tbl.h"
@@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
{
- struct cpl_tid_release *req;
+ u32 len = roundup(sizeof(struct cpl_tid_release), 16);
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ skb = get_skb(skb, len, GFP_KERNEL);
if (!skb)
return;
- req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
- INIT_TP_WR(req, hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+
+ cxgb_mk_tid_release(skb, len, hwtid, 0);
c4iw_ofld_send(rdev, skb);
return;
}
@@ -333,6 +332,8 @@ static void remove_ep_tid(struct c4iw_ep *ep)
spin_lock_irqsave(&ep->com.dev->lock, flags);
_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+ if (idr_is_empty(&ep->com.dev->hwtid_idr))
+ wake_up(&ep->com.dev->wait);
spin_unlock_irqrestore(&ep->com.dev->lock, flags);
}
@@ -464,72 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
-static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
-{
- int i;
-
- egress_dev = get_real_dev(egress_dev);
- for (i = 0; i < dev->rdev.lldi.nports; i++)
- if (dev->rdev.lldi.ports[i] == egress_dev)
- return 1;
- return 0;
-}
-
-static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
- __u8 *peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos,
- __u32 sin6_scope_id)
-{
- struct dst_entry *dst = NULL;
-
- if (IS_ENABLED(CONFIG_IPV6)) {
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- memcpy(&fl6.daddr, peer_ip, 16);
- memcpy(&fl6.saddr, local_ip, 16);
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = sin6_scope_id;
- dst = ip6_route_output(&init_net, NULL, &fl6);
- if (!dst)
- goto out;
- if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
- !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
- dst_release(dst);
- dst = NULL;
- }
- }
-
-out:
- return dst;
-}
-
-static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
- struct neighbour *n;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- n = dst_neigh_lookup(&rt->dst, &peer_ip);
- if (!n)
- return NULL;
- if (!our_interface(dev, n->dev) &&
- !(n->dev->flags & IFF_LOOPBACK)) {
- neigh_release(n);
- dst_release(&rt->dst);
- return NULL;
- }
- neigh_release(n);
- return &rt->dst;
-}
-
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
pr_err(MOD "ARP failure\n");
@@ -704,56 +639,32 @@ static int send_flowc(struct c4iw_ep *ep)
static int send_halfclose(struct c4iw_ep *ep)
{
- struct cpl_close_con_req *req;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
- ep->hwtid));
+ cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+ NULL, arp_failure_discard);
+
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
static int send_abort(struct c4iw_ep *ep)
{
- struct cpl_abort_req *req;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
- set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
- t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
- req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
-}
+ cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+ ep, abort_arp_failure);
-static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- unsigned int *idx, int use_ts, int ipv6)
-{
- unsigned short hdr_size = (ipv6 ?
- sizeof(struct ipv6hdr) :
- sizeof(struct iphdr)) +
- sizeof(struct tcphdr) +
- (use_ts ?
- round_up(TCPOLEN_TIMESTAMP, 4) : 0);
- unsigned short data_size = mtu - hdr_size;
-
- cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+ return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
static int send_connect(struct c4iw_ep *ep)
@@ -768,7 +679,7 @@ static int send_connect(struct c4iw_ep *ep)
u64 opt0;
u32 opt2;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
int win, sizev4, sizev6, wrlen;
struct sockaddr_in *la = (struct sockaddr_in *)
&ep->com.local_addr;
@@ -815,10 +726,10 @@ static int send_connect(struct c4iw_ep *ep)
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -1445,9 +1356,9 @@ static void established_upcall(struct c4iw_ep *ep)
static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
{
- struct cpl_rx_data_ack *req;
struct sk_buff *skb;
- int wrlen = roundup(sizeof *req, 16);
+ u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+ u32 credit_dack;
PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -1464,15 +1375,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
- req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
- INIT_TP_WR(req, ep->hwtid);
- OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
- ep->hwtid));
- req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
- RX_DACK_CHANGE_F |
- RX_DACK_MODE_V(dack_mode));
- set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+ credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+ RX_DACK_MODE_V(dack_mode);
+
+ cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+ credit_dack);
+
c4iw_ofld_send(&ep->com.dev->rdev, skb);
return credits;
}
@@ -1827,8 +1735,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
(ep->mpa_pkt + sizeof(*mpa));
ep->ird = ntohs(mpa_v2_params->ird) &
MPA_V2_IRD_ORD_MASK;
+ ep->ird = min_t(u32, ep->ird,
+ cur_max_read_depth(ep->com.dev));
ep->ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
+ ep->ord = min_t(u32, ep->ord,
+ cur_max_read_depth(ep->com.dev));
PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
ep->ord);
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
@@ -1966,7 +1878,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
struct sk_buff *skb;
struct fw_ofld_connection_wr *req;
unsigned int mtu_idx;
- int wscale;
+ u32 wscale;
struct sockaddr_in *sin;
int win;
@@ -1991,10 +1903,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -2048,15 +1960,6 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_CONN_EXIST);
}
-/* Returns whether a CPL status conveys negative advice.
- */
-static int is_neg_adv(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE ||
- status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
static char *neg_adv_str(unsigned int status)
{
switch (status) {
@@ -2113,8 +2016,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
}
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t)
+ if (!ep->l2t) {
+ dev_put(pdev);
goto out;
+ }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
@@ -2210,16 +2115,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
/* find a route */
if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
- ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, ep->com.cm_id->tos);
+ ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, ep->com.cm_id->tos);
iptype = 4;
ra = (__u8 *)&raddr->sin_addr;
} else {
- ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+ get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
}
@@ -2291,7 +2201,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
- if (is_neg_adv(status)) {
+ if (cxgb_is_neg_adv(status)) {
PDBG("%s Connection problems for atid %u status %u (%s)\n",
__func__, atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
@@ -2418,7 +2328,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
unsigned int mtu_idx;
u64 opt0;
u32 opt2;
- int wscale;
+ u32 wscale;
struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
@@ -2439,10 +2349,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
ep->hwtid));
- best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- enable_tcp_timestamps && req->tcpopt.tstamp,
- (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- wscale = compute_wscale(rcv_win);
+ cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp,
+ (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ wscale = cxgb_compute_wscale(rcv_win);
/*
* Specify the largest window that will fit in opt0. The
@@ -2514,42 +2424,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
return;
}
-static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
- int *iptype, __u8 *local_ip, __u8 *peer_ip,
- __be16 *local_port, __be16 *peer_port)
-{
- int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
- struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
- struct tcphdr *tcp = (struct tcphdr *)
- ((u8 *)(req + 1) + eth_len + ip_len);
-
- if (ip->version == 4) {
- PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
- ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 4;
- memcpy(peer_ip, &ip->saddr, 4);
- memcpy(local_ip, &ip->daddr, 4);
- } else {
- PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
- ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
- ntohs(tcp->dest));
- *iptype = 6;
- memcpy(peer_ip, ip6->saddr.s6_addr, 16);
- memcpy(local_ip, ip6->daddr.s6_addr, 16);
- }
- *peer_port = tcp->source;
- *local_port = tcp->dest;
-
- return;
-}
-
static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -2578,8 +2452,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
- local_ip, peer_ip, &local_port, &peer_port);
+ cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+ &iptype, local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
@@ -2587,18 +2461,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
- local_port, peer_port,
- tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ local_port, peer_port, tos);
} else {
PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
, __func__, parent_ep, hwtid,
local_ip, peer_ip, ntohs(local_port),
ntohs(peer_port), peer_mss);
- dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
- PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
- ((struct sockaddr_in6 *)
- &parent_ep->com.local_addr)->sin6_scope_id);
+ dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ local_ip, peer_ip, local_port, peer_port,
+ PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+ ((struct sockaddr_in6 *)
+ &parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@ -2831,18 +2706,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
struct c4iw_ep *ep;
- struct cpl_abort_rpl *rpl;
struct sk_buff *rpl_skb;
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
unsigned int tid = GET_TID(req);
+ u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
@@ -2935,11 +2810,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
release = 1;
goto out;
}
- set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
+
+ cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+
c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
out:
if (release)
@@ -3136,7 +3009,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
if (conn_param->ord > ep->ird) {
if (RELAXED_IRD_NEGOTIATION) {
- ep->ord = ep->ird;
+ conn_param->ord = ep->ird;
} else {
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
@@ -3371,9 +3244,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
__func__, &laddr->sin_addr, ntohs(laddr->sin_port),
ra, ntohs(raddr->sin_port));
- ep->dst = find_route(dev, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, cm_id->tos);
+ ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ laddr->sin_addr.s_addr,
+ raddr->sin_addr.s_addr,
+ laddr->sin_port,
+ raddr->sin_port, cm_id->tos);
} else {
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
@@ -3392,10 +3267,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
__func__, laddr6->sin6_addr.s6_addr,
ntohs(laddr6->sin6_port),
raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
- ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
- raddr6->sin6_addr.s6_addr,
- laddr6->sin6_port, raddr6->sin6_port, 0,
- raddr6->sin6_scope_id);
+ ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ laddr6->sin6_addr.s6_addr,
+ raddr6->sin6_addr.s6_addr,
+ laddr6->sin6_port,
+ raddr6->sin6_port, 0,
+ raddr6->sin6_scope_id);
}
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@ -4037,8 +3914,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
ntohs(tcph->source), iph->tos);
- dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
- iph->tos);
+ dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ iph->daddr, iph->saddr, tcph->dest,
+ tcph->source, iph->tos);
if (!dst) {
pr_err("%s - failed to find dst entry!\n",
__func__);
@@ -4313,7 +4191,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
kfree_skb(skb);
return 0;
}
- if (is_neg_adv(req->status)) {
+ if (cxgb_is_neg_adv(req->status)) {
PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
__func__, ep->hwtid, req->status,
neg_adv_str(req->status));
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index bc522a9b2bfa..867b8cf82be8 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1033,15 +1033,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct c4iw_cq *chp;
- int ret;
+ int ret = 0;
unsigned long flag;
chp = to_c4iw_cq(ibcq);
spin_lock_irqsave(&chp->lock, flag);
- ret = t4_arm_cq(&chp->cq,
- (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+ t4_arm_cq(&chp->cq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+ if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+ ret = t4_cq_notempty(&chp->cq);
spin_unlock_irqrestore(&chp->lock, flag);
- if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
- ret = 0;
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d7332ec06..93e3d270a98a 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
idr_destroy(&ctx->dev->cqidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
idr_destroy(&ctx->dev->qpidr);
+ WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
idr_destroy(&ctx->dev->mmidr);
+ wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);
+ init_waitqueue_head(&devp->wait);
devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
if (c4iw_debugfs_root) {
@@ -1475,6 +1480,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
+ .nrxq = MAX_ULD_QSETS,
+ .rxq_size = 511,
+ .ciq = true,
+ .lro = false,
.add = c4iw_uld_add,
.rx_handler = c4iw_uld_rx_handler,
.state_change = c4iw_uld_state_change,
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f83604b2f82d..7e7f79e55006 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -263,6 +263,7 @@ struct c4iw_dev {
struct idr stid_idr;
struct list_head db_fc_list;
u32 avail_ird;
+ wait_queue_head_t wait;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -881,15 +882,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
return cm_id->provider_data;
}
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
{
#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 3467b906cff8..f57deba6717c 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -718,7 +718,7 @@ static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
return 0;
}
-void _free_qp(struct kref *kref)
+static void _free_qp(struct kref *kref)
{
struct c4iw_qp *qhp;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index b2bfbb1eef1a..862381aa83c8 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -636,6 +636,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe)
return (CQE_GENBIT(cqe) == cq->gen);
}
+static inline int t4_cq_notempty(struct t4_cq *cq)
+{
+ return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]);
+}
+
static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
{
int ret;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 79575ee873f2..a26a9a0bfc41 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,7 +47,7 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
-#include <linux/cpumask.h>
+#include <linux/interrupt.h>
#include "hfi.h"
#include "affinity.h"
@@ -56,7 +56,7 @@
struct hfi1_affinity_node_list node_affinity = {
.list = LIST_HEAD_INIT(node_affinity.list),
- .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+ .lock = __MUTEX_INITIALIZER(node_affinity.lock)
};
/* Name of IRQ types, indexed by enum irq_type */
@@ -160,14 +160,14 @@ void node_affinity_destroy(void)
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
list_for_each_safe(pos, q, &node_affinity.list) {
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
kfree(entry);
}
- spin_unlock(&node_affinity.lock);
+ mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
}
@@ -234,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
/*
* If this is the first time this NUMA node's affinity is used,
@@ -247,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
+ mutex_unlock(&node_affinity.lock);
return -ENOMEM;
}
init_cpu_mask_set(&entry->def_intr);
@@ -303,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- spin_lock(&node_affinity.lock);
node_affinity_add_tail(entry);
- spin_unlock(&node_affinity.lock);
}
-
+ mutex_unlock(&node_affinity.lock);
return 0;
}
-int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+/*
+ * Function updates the irq affinity hint for msix after it has been changed
+ * by the user using the /proc/irq interface. This function only accepts
+ * one cpu in the mask.
+ */
+static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
+{
+ struct sdma_engine *sde = msix->arg;
+ struct hfi1_devdata *dd = sde->dd;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set;
+ int i, old_cpu;
+
+ if (cpu > num_online_cpus() || cpu == sde->cpu)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ old_cpu = sde->cpu;
+ sde->cpu = cpu;
+ cpumask_clear(&msix->mask);
+ cpumask_set_cpu(cpu, &msix->mask);
+ dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
+ msix->msix.vector, irq_type_names[msix->type],
+ sde->this_idx, cpu);
+ irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+ /*
+ * Set the new cpu in the hfi1_affinity_node and clean
+ * the old cpu if it is not used by any other IRQ
+ */
+ set = &entry->def_intr;
+ cpumask_set_cpu(cpu, &set->mask);
+ cpumask_set_cpu(cpu, &set->used);
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *other_msix;
+
+ other_msix = &dd->msix_entries[i];
+ if (other_msix->type != IRQ_SDMA || other_msix == msix)
+ continue;
+
+ if (cpumask_test_cpu(old_cpu, &other_msix->mask))
+ goto unlock;
+ }
+ cpumask_clear_cpu(old_cpu, &set->mask);
+ cpumask_clear_cpu(old_cpu, &set->used);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+}
+
+static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ int cpu = cpumask_first(mask);
+ struct hfi1_msix_entry *msix = container_of(notify,
+ struct hfi1_msix_entry,
+ notify);
+
+ /* Only one CPU configuration supported currently */
+ hfi1_update_sdma_affinity(msix, cpu);
+}
+
+static void hfi1_irq_notifier_release(struct kref *ref)
+{
+ /*
+ * This is required by affinity notifier. We don't have anything to
+ * free here.
+ */
+}
+
+static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ notify->irq = msix->msix.vector;
+ notify->notify = hfi1_irq_notifier_notify;
+ notify->release = hfi1_irq_notifier_release;
+
+ if (irq_set_affinity_notifier(notify->irq, notify))
+ pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ if (irq_set_affinity_notifier(notify->irq, NULL))
+ pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+/*
+ * Function sets the irq affinity for msix.
+ * It *must* be called with node_affinity.lock held.
+ */
+static int get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
@@ -329,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
if (!ret)
return -ENOMEM;
- spin_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
@@ -361,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
* finds its CPU here.
*/
if (cpu == -1 && set) {
- spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -373,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&node_affinity.lock);
- }
-
- switch (msix->type) {
- case IRQ_SDMA:
- sde->cpu = cpu;
- break;
- case IRQ_GENERAL:
- case IRQ_RCVCTXT:
- case IRQ_OTHER:
- break;
}
cpumask_set_cpu(cpu, &msix->mask);
@@ -392,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
extra, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ if (msix->type == IRQ_SDMA) {
+ sde->cpu = cpu;
+ hfi1_setup_sdma_notifier(msix);
+ }
+
free_cpumask_var(diff);
return 0;
}
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+ int ret;
+
+ mutex_lock(&node_affinity.lock);
+ ret = get_irq_affinity(dd, msix);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
@@ -403,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
set = &entry->def_intr;
+ hfi1_cleanup_sdma_notifier(msix);
break;
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
@@ -421,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
break;
default:
+ mutex_unlock(&node_affinity.lock);
return;
}
if (set) {
- spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
+ mutex_unlock(&node_affinity.lock);
}
/* This should be called with node_affinity.lock held */
@@ -536,7 +635,7 @@ int hfi1_get_proc_affinity(int node)
if (!ret)
goto free_available_mask;
- spin_lock(&affinity->lock);
+ mutex_lock(&affinity->lock);
/*
* If we've used all available HW threads, clear the mask and start
* overloading.
@@ -644,7 +743,8 @@ int hfi1_get_proc_affinity(int node)
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&affinity->lock);
+
+ mutex_unlock(&affinity->lock);
hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
free_cpumask_var(intrs_mask);
@@ -665,49 +765,53 @@ void hfi1_put_proc_affinity(int cpu)
if (cpu < 0)
return;
- spin_lock(&affinity->lock);
+
+ mutex_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&affinity->lock);
+ mutex_unlock(&affinity->lock);
}
-/* Prevents concurrent reads and writes of the sdma_affinity attrib */
-static DEFINE_MUTEX(sdma_affinity_mutex);
-
int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
size_t count)
{
struct hfi1_affinity_node *entry;
- struct cpumask mask;
+ cpumask_var_t mask;
int ret, i;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
- return -EINVAL;
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
- ret = cpulist_parse(buf, &mask);
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = cpulist_parse(buf, mask);
if (ret)
- return ret;
+ goto out;
- if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
+ if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
dd_dev_warn(dd, "Invalid CPU mask\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- mutex_lock(&sdma_affinity_mutex);
/* reset the SDMA interrupt affinity details */
init_cpu_mask_set(&entry->def_intr);
- cpumask_copy(&entry->def_intr.mask, &mask);
- /*
- * Reassign the affinity for each SDMA interrupt.
- */
+ cpumask_copy(&entry->def_intr.mask, mask);
+
+ /* Reassign the affinity for each SDMA interrupt. */
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *msix;
@@ -715,13 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
if (msix->type != IRQ_SDMA)
continue;
- ret = hfi1_get_irq_affinity(dd, msix);
+ ret = get_irq_affinity(dd, msix);
if (ret)
break;
}
-
- mutex_unlock(&sdma_affinity_mutex);
+out:
+ free_cpumask_var(mask);
+unlock:
+ mutex_unlock(&node_affinity.lock);
return ret ? ret : strnlen(buf, PAGE_SIZE);
}
@@ -729,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
{
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
+ if (!entry) {
+ mutex_unlock(&node_affinity.lock);
return -EINVAL;
+ }
- mutex_lock(&sdma_affinity_mutex);
cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
- mutex_unlock(&sdma_affinity_mutex);
+ mutex_unlock(&node_affinity.lock);
return strnlen(buf, PAGE_SIZE);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 8879cf7a8cac..b89ea3c0ee1a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -121,8 +121,7 @@ struct hfi1_affinity_node_list {
int num_core_siblings;
int num_online_nodes;
int num_online_cpus;
- /* protect affinity node list */
- spinlock_t lock;
+ struct mutex lock; /* protects affinity nodes */
};
int node_affinity_init(void);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b32638d58ae8..9bf5f23544d4 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT),
- FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT)
+ FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT),
+ FLAG_ENTRY0("External Device Request Timeout",
+ EXTERNAL_DEVICE_REQ_TIMEOUT),
};
/*
@@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work)
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
OPA_LINKDOWN_REASON_SPEED_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work)
* If there is no cable attached, turn the DC off. Otherwise,
* start the link bring up.
*/
- if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
+ if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd))
dc_shutdown(ppd->dd);
- } else {
- tune_serdes(ppd);
+ else
start_link(ppd);
- }
}
void handle_link_bounce(struct work_struct *work)
@@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work)
*/
if (ppd->host_link_state & HLS_UP) {
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
@@ -7531,7 +7529,6 @@ done:
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
OPA_LINKDOWN_REASON_WIDTH_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -9161,6 +9158,12 @@ set_local_link_attributes_fail:
*/
int start_link(struct hfi1_pportdata *ppd)
{
+ /*
+ * Tune the SerDes to a ballpark setting for optimal signal and bit
+ * error rate. Needs to be done before starting the link.
+ */
+ tune_serdes(ppd);
+
if (!ppd->link_enabled) {
dd_dev_info(ppd->dd,
"%s: stopping link start because link is disabled\n",
@@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work)
*/
set_qsfp_int_n(ppd, 1);
- tune_serdes(ppd);
-
start_link(ppd);
}
@@ -9490,6 +9491,73 @@ static void init_lcb(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}
+/*
+ * Perform a test read on the QSFP. Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+ int ret;
+ u8 status;
+
+ /* report success if not a QSFP */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0;
+
+ /* read byte 2, the status byte */
+ ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+ if (ret < 0)
+ return ret;
+ if (ret != 1)
+ return -EIO;
+
+ return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms). The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read. If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+ if (test_qsfp_read(ppd)) {
+ /* read failed */
+ if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+ dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+ return;
+ }
+ dd_dev_info(ppd->dd,
+ "QSFP not responding, waiting and retrying %d\n",
+ (int)ppd->qsfp_retry_count);
+ ppd->qsfp_retry_count++;
+ queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ msecs_to_jiffies(QSFP_RETRY_WAIT));
+ return;
+ }
+ ppd->qsfp_retry_count = 0;
+
+ start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+ struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+ start_link_work.work);
+ try_start_link(ppd);
+}
+
int bringup_serdes(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9593,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
set_qsfp_int_n(ppd, 1);
}
- /*
- * Tune the SerDes to a ballpark setting for
- * optimal signal and bit error rate
- * Needs to be done before starting the link
- */
- tune_serdes(ppd);
-
- return start_link(ppd);
+ try_start_link(ppd);
+ return 0;
}
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9611,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;
+ ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+ flush_delayed_work(&ppd->start_link_work);
+ cancel_delayed_work_sync(&ppd->start_link_work);
+
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -9648,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct hfi1_message_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
+struct ib_header *hfi1_get_msgheader(
+ struct hfi1_devdata *dd, __le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
- return (struct hfi1_message_header *)
+ return (struct ib_header *)
(rhf_addr - dd->rhf_offset + offset);
}
@@ -11489,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
!(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
/* reset the cached receive header queue head value */
@@ -11557,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* update with a dummy tail address and then disable
* receive context.
*/
- if (dd->rcvhdrtail_dummy_physaddr) {
+ if (dd->rcvhdrtail_dummy_dma) {
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
}
@@ -11570,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
+ if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11642,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* so it doesn't contain an address that is invalid.
*/
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
}
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
@@ -12865,7 +12931,7 @@ fail:
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
- int num_kernel_contexts;
+ unsigned long num_kernel_contexts;
int total_contexts;
int ret;
unsigned ngroups;
@@ -12894,9 +12960,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
dd_dev_err(dd,
- "Reducing # kernel rcv contexts to: %d, from %d\n",
+ "Reducing # kernel rcv contexts to: %d, from %lu\n",
(int)(dd->chip_send_contexts - num_vls - 1),
- (int)num_kernel_contexts);
+ num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
@@ -13319,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd)
/*
* Give up after 1ms - maximum wait time.
*
- * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
+ * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at
* 250MB/s bandwidth. Lower rate to 66% for overhead to get:
- * 148 KB / (66% * 250MB/s) = 920us
+ * 136 KB / (66% * 250MB/s) = 844us
*/
if (count++ > 500) {
dd_dev_err(dd,
@@ -14500,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /* call before get_platform_config(), after init_chip_resources() */
+ ret = eprom_init(dd);
+ if (ret)
+ goto bail_free_rcverr;
+
/* Needs to be called before hfi1_firmware_init */
get_platform_config(dd);
@@ -14620,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
- ret = eprom_init(dd);
- if (ret)
- goto bail_free_rcverr;
-
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ed11107c50fe..92345259a8f4 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -82,7 +82,7 @@
*/
#define CM_VAU 3
/* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
-#define CM_GLOBAL_CREDITS 0x940
+#define CM_GLOBAL_CREDITS 0x880
/* Number of PKey entries in the HW */
#define MAX_PKEY_VALUES 16
@@ -254,12 +254,14 @@
#define FAILED_LNI_VERIFY_CAP2 BIT(10)
#define FAILED_LNI_CONFIGLT BIT(11)
#define HOST_HANDSHAKE_TIMEOUT BIT(12)
+#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13)
#define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
| FAILED_LNI_VERIFY_CAP1 \
| FAILED_LNI_VERIFY_CAP2 \
- | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
+ | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \
+ | EXTERNAL_DEVICE_REQ_TIMEOUT)
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
#define HOST_REQ_DONE BIT(0)
@@ -706,6 +708,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
@@ -1335,7 +1338,7 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct hfi1_message_header *hfi1_get_msgheader(
+struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index fcc9c217a97a..da7be21bedb4 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -320,14 +320,6 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
-/*
- * This structure contains the first field common to all protocols
- * that employ this chip.
- */
-struct hfi1_message_header {
- __be16 lrh[4];
-};
-
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9cc288..632ba21759ab 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -59,6 +59,40 @@
static struct dentry *hfi1_dbg_root;
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+ struct file *file,
+ char __user *buf,
+ size_t size,
+ loff_t *ppos)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ ssize_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_read(file, buf, size, ppos);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
+static loff_t hfi1_seq_lseek(
+ struct file *file,
+ loff_t offset,
+ int whence)
+{
+ struct dentry *d = file->f_path.dentry;
+ int srcu_idx;
+ loff_t r;
+
+ r = debugfs_use_file_start(d, &srcu_idx);
+ if (likely(!r))
+ r = seq_lseek(file, offset, whence);
+ debugfs_use_file_finish(srcu_idx);
+ return r;
+}
+
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
@@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
- .read = seq_read, \
- .llseek = seq_lseek, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
.release = seq_release \
}
@@ -105,11 +139,9 @@ do { \
DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_opcode_stats_perctx *opstats;
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(opstats->stats))
return NULL;
return pos;
@@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats)
DEBUGFS_FILE_OPS(ctx_stats);
static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
+ __acquires(RCU)
{
struct qp_iter *iter;
loff_t n = *pos;
- rcu_read_lock();
iter = qp_iter_init(s->private);
+
+ /* stop calls rcu_read_unlock */
+ rcu_read_lock();
+
if (!iter)
return NULL;
- while (n--) {
+ do {
if (qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
- }
+ } while (n--);
return iter;
}
static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
loff_t *pos)
+ __must_hold(RCU)
{
struct qp_iter *iter = iter_ptr;
@@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
}
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
-__releases(RCU)
+ __releases(RCU)
{
rcu_read_unlock();
}
@@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats)
DEBUGFS_FILE_OPS(qp_stats);
static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
struct hfi1_ibdev *ibd;
struct hfi1_devdata *dd;
- rcu_read_lock();
ibd = (struct hfi1_ibdev *)s->private;
dd = dd_from_dev(ibd);
if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_cntrs(dd, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
struct hfi1_devdata *dd;
ssize_t rval;
- rcu_read_lock();
dd = private2dd(file);
avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
rval = simple_read_from_buffer(buf, count, ppos, names, avail);
- rcu_read_unlock();
return rval;
}
@@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
struct hfi1_pportdata *ppd;
ssize_t rval;
- rcu_read_lock();
ppd = private2ppd(file);
avail = hfi1_read_portcntrs(ppd, NULL, &counters);
rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
- rcu_read_unlock();
return rval;
}
@@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
int used;
int i;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
size = PAGE_SIZE;
used = 0;
tmp = kmalloc(size, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
used += scnprintf(tmp + used, size - used,
@@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf,
used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
u64 scratch0;
u64 clear;
- rcu_read_lock();
ppd = private2ppd(file);
dd = ppd->dd;
buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto do_return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
do_free:
kfree(buff);
- do_return:
- rcu_read_unlock();
return ret;
}
@@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
char *tmp;
int ret;
- rcu_read_lock();
ppd = private2ppd(file);
tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!tmp) {
- rcu_read_unlock();
+ if (!tmp)
return -ENOMEM;
- }
ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
if (ret > 0)
ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
- rcu_read_unlock();
kfree(tmp);
return ret;
}
@@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
int offset;
int total_written;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
@@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
int offset;
int total_read;
- rcu_read_lock();
ppd = private2ppd(file);
/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
offset = *ppos & 0xffff;
/* explicitly reject invalid address 0 to catch cp and cat */
- if (i2c_addr == 0) {
- ret = -EINVAL;
- goto _return;
- }
+ if (i2c_addr == 0)
+ return -EINVAL;
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
if (total_read < 0) {
@@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
int ret;
int total_written;
- rcu_read_lock();
- if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
- ret = -EINVAL;
- goto _return;
- }
+ if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+ return -EINVAL;
ppd = private2ppd(file);
buff = kmalloc(count, GFP_KERNEL);
- if (!buff) {
- ret = -ENOMEM;
- goto _return;
- }
+ if (!buff)
+ return -ENOMEM;
ret = copy_from_user(buff, buf, count);
if (ret > 0) {
ret = -EFAULT;
goto _free;
}
-
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
_free:
kfree(buff);
- _return:
- rcu_read_unlock();
return ret;
}
@@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
int ret;
int total_read;
- rcu_read_lock();
if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
ret = -EINVAL;
goto _return;
@@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
_free:
kfree(buff);
_return:
- rcu_read_unlock();
return ret;
}
@@ -948,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
};
+static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ loff_t *spos = v;
+ loff_t i = *spos;
+
+ sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
+DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
+DEBUGFS_FILE_OPS(sdma_cpu_list);
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -976,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
@@ -1006,7 +1029,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
ibd->hfi1_ibdev_dbg = NULL;
- synchronize_rcu();
}
/*
@@ -1031,9 +1053,7 @@ static const char * const hfi1_statnames[] = {
};
static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1051,9 +1071,7 @@ static void *_driver_stats_names_seq_next(
}
static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1069,9 +1087,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names)
DEBUGFS_FILE_OPS(driver_stats_names);
static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
{
- rcu_read_lock();
if (*pos >= ARRAY_SIZE(hfi1_statnames))
return NULL;
return pos;
@@ -1086,9 +1102,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
}
static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
{
- rcu_read_unlock();
}
static u64 hfi1_sps_ints(void)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 8246dc7d0573..6563e4d38b80 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
struct hfi1_packet *packet)
{
- struct hfi1_message_header *rhdr = packet->hdr;
+ struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr;
- struct hfi1_other_headers *ohdr = NULL;
+ struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(hdr->lrh[1]);
+ u16 lid = be16_to_cpu(rhdr->lrh[1]);
u32 qp_num;
u32 rcv_flags = 0;
@@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* Check for GRH */
if (lnh == HFI1_LRH_BTH) {
- ohdr = &hdr->u.oth;
+ ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
- ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ ohdr = &rhdr->u.l.oth;
+ if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
@@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
case IB_QPT_RC:
hfi1_rc_hdrerr(
rcd,
- hdr,
+ rhdr,
rcv_flags,
qp);
break;
@@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_ib_header *hdr = pkt->hdr;
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_header *hdr = pkt->hdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
@@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
+ sc = hdr2sc(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
@@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header *hdr;
+ struct ib_other_headers *ohdr;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- hdr = (struct hfi1_ib_header *)
- hfi1_get_msgheader(dd, rhf_addr);
+ hdr = hfi1_get_msgheader(dd, rhf_addr);
+
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_BTH)
@@ -888,14 +887,15 @@ void set_all_slowpath(struct hfi1_devdata *dd)
}
static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet packet,
+ struct hfi1_packet *packet,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
- packet.rhf_addr);
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ u8 etype = rhf_rcv_type(packet->rhf);
- if (hdr2sc(hdr, packet.rhf) != 0xf) {
+ if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
int hwstate = read_logical_state(dd);
if (hwstate != LSTATE_ACTIVE) {
@@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
/* Auto activate link on non-SC15 packet receive */
if (unlikely(rcd->ppd->host_link_state ==
HLS_UP_ARMED) &&
- set_armed_to_active(rcd, packet, dd))
+ set_armed_to_active(rcd, &packet, dd))
goto bail;
last = process_rcv_packet(&packet, thread);
}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 36b77943cbfd..e70c223801b4 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -49,7 +49,26 @@
#include "common.h"
#include "eprom.h"
+/*
+ * The EPROM is logically divided into three partitions:
+ * partition 0: the first 128K, visible from PCI ROM BAR
+ * partition 1: 4K config file (sector size)
+ * partition 2: the rest
+ */
+#define P0_SIZE (128 * 1024)
+#define P1_SIZE (4 * 1024)
+#define P1_START P0_SIZE
+#define P2_START (P0_SIZE + P1_SIZE)
+
+/* controller page size, in bytes */
+#define EP_PAGE_SIZE 256
+#define EP_PAGE_MASK (EP_PAGE_SIZE - 1)
+#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32))
+
+/* controller commands */
#define CMD_SHIFT 24
+#define CMD_NOP (0)
+#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
/* controller interface speeds */
@@ -61,6 +80,90 @@
* Double it for safety.
*/
#define EPROM_TIMEOUT 80000 /* ms */
+
+/*
+ * Read a 256 byte (64 dword) EPROM page.
+ * All callers have verified the offset is at a page boundary.
+ */
+static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
+{
+ int i;
+
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
+ for (i = 0; i < EP_PAGE_DWORDS; i++)
+ result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
+}
+
+/*
+ * Read length bytes starting at offset from the start of the EPROM.
+ */
+static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest)
+{
+ u32 buffer[EP_PAGE_DWORDS];
+ u32 end;
+ u32 start_offset;
+ u32 read_start;
+ u32 bytes;
+
+ if (len == 0)
+ return 0;
+
+ end = start + len;
+
+ /*
+ * Make sure the read range is not outside of the controller read
+ * command address range. Note that '>' is correct below - the end
+ * of the range is OK if it stops at the limit, but no higher.
+ */
+ if (end > (1 << CMD_SHIFT))
+ return -EINVAL;
+
+ /* read the first partial page */
+ start_offset = start & EP_PAGE_MASK;
+ if (start_offset) {
+ /* partial starting page */
+
+ /* align and read the page that contains the start */
+ read_start = start & ~EP_PAGE_MASK;
+ read_page(dd, read_start, buffer);
+
+ /* the rest of the page is available data */
+ bytes = EP_PAGE_SIZE - start_offset;
+
+ if (len <= bytes) {
+ /* end is within this page */
+ memcpy(dest, (u8 *)buffer + start_offset, len);
+ return 0;
+ }
+
+ memcpy(dest, (u8 *)buffer + start_offset, bytes);
+
+ start += bytes;
+ len -= bytes;
+ dest += bytes;
+ }
+ /* start is now page aligned */
+
+ /* read whole pages */
+ while (len >= EP_PAGE_SIZE) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, EP_PAGE_SIZE);
+
+ start += EP_PAGE_SIZE;
+ len -= EP_PAGE_SIZE;
+ dest += EP_PAGE_SIZE;
+ }
+
+ /* read the last partial page */
+ if (len) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, len);
+ }
+
+ return 0;
+}
+
/*
* Initialize the EPROM handler.
*/
@@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd)
done_asic:
return ret;
}
+
+/* magic character sequence that trails an image */
+#define IMAGE_TRAIL_MAGIC "egamiAPO"
+
+/*
+ * Read all of partition 1. The actual file is at the front. Adjust
+ * the returned size if a trailing image magic is found.
+ */
+static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
+ u32 *size)
+{
+ void *buffer;
+ void *p;
+ u32 length;
+ int ret;
+
+ buffer = kmalloc(P1_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ ret = read_length(dd, P1_START, P1_SIZE, buffer);
+ if (ret) {
+ kfree(buffer);
+ return ret;
+ }
+
+ /* scan for image magic that may trail the actual data */
+ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+ if (p)
+ length = p - buffer;
+ else
+ length = P1_SIZE;
+
+ *data = buffer;
+ *size = length;
+ return 0;
+}
+
+/*
+ * Read the platform configuration file from the EPROM.
+ *
+ * On success, an allocated buffer containing the data and its size are
+ * returned. It is up to the caller to free this buffer.
+ *
+ * Return value:
+ * 0 - success
+ * -ENXIO - no EPROM is available
+ * -EBUSY - not able to acquire access to the EPROM
+ * -ENOENT - no recognizable file written
+ * -ENOMEM - buffer could not be allocated
+ */
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
+{
+ u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */
+ int ret;
+
+ if (!dd->eprom_available)
+ return -ENXIO;
+
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+ if (ret)
+ return -EBUSY;
+
+ /* read the last page of P0 for the EPROM format magic */
+ ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ if (ret)
+ goto done;
+
+ /* last dword of P0 contains a magic indicator */
+ if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* partition format */
+ ret = read_partition_platform_config(dd, data, size);
+ goto done;
+ }
+
+ /* nothing recognized */
+ ret = -ENOENT;
+
+done:
+ release_chip_resource(dd, CR_EPROM);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..e774184f1643 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -45,8 +45,8 @@
*
*/
-struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret,
+ u32 *size_ret);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1ecbec192358..677efa0e8cd6 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -58,7 +58,6 @@
#include "trace.h"
#include "user_sdma.h"
#include "user_exp_rcv.h"
-#include "eprom.h"
#include "aspm.h"
#include "mmu_rb.h"
@@ -183,6 +182,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
if (fd) {
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
+ atomic_inc(&fd->mm->mm_count);
}
fp->private_data = fd;
@@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
ret = assign_ctxt(fp, &uinfo);
if (ret < 0)
return ret;
- setup_ctxt(fp);
+ ret = setup_ctxt(fp);
if (ret)
return ret;
ret = user_init(fp);
@@ -439,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd;
- unsigned long flags, pfn;
+ unsigned long flags;
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
+ void *memvirt = NULL;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -492,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memaddr = dd->cr_base[uctxt->numa_id].pa +
+ memvirt = dd->cr_base[uctxt->numa_id].va;
+ memaddr = virt_to_phys(memvirt) +
(((u64)uctxt->sc->hw_free -
(u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
memlen = PAGE_SIZE;
@@ -507,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
mapio = 1;
break;
case RCV_HDRQ:
- memaddr = uctxt->rcvhdrq_phys;
memlen = uctxt->rcvhdrq_size;
+ memvirt = uctxt->rcvhdrq;
break;
case RCV_EGRBUF: {
unsigned long addr;
@@ -532,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_flags &= ~VM_MAYWRITE;
addr = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
+ memlen = uctxt->egrbufs.buffers[i].len;
+ memvirt = uctxt->egrbufs.buffers[i].addr;
ret = remap_pfn_range(
vma, addr,
- uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
- uctxt->egrbufs.buffers[i].len,
+ /*
+ * virt_to_pfn() does the same, but
+ * it's not available on x86_64
+ * when CONFIG_MMU is enabled.
+ */
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
vma->vm_page_prot);
if (ret < 0)
goto done;
- addr += uctxt->egrbufs.buffers[i].len;
+ addr += memlen;
}
ret = 0;
goto done;
@@ -595,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- memaddr = uctxt->rcvhdrqtailaddr_phys;
memlen = PAGE_SIZE;
+ memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -649,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
"%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
vma->vm_end - vma->vm_start, vma->vm_flags);
- pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
if (vmf) {
- vma->vm_pgoff = pfn;
+ vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
} else if (mapio) {
- ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
+ } else if (memvirt) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
+ vma->vm_page_prot);
} else {
- ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
}
done:
@@ -779,6 +796,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
mutex_unlock(&hfi1_mutex);
hfi1_free_ctxtdata(dd, uctxt);
done:
+ mmdrop(fdata->mm);
kobject_put(&dd->kobj);
kfree(fdata);
return 0;
@@ -959,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
uctxt->dd->node);
- if (!uctxt->sc)
- return -ENOMEM;
-
+ if (!uctxt->sc) {
+ ret = -ENOMEM;
+ goto ctxdata_free;
+ }
hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
- return ret;
+ goto ctxdata_free;
+
/*
* Setup shared context resources if the user-level has requested
* shared contexts and this is the 'master' process.
@@ -980,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
* send context because it will be done during file close
*/
if (ret)
- return ret;
+ goto ctxdata_free;
}
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
@@ -1000,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
fd->uctxt = uctxt;
return 0;
+
+ctxdata_free:
+ dd->rcd[ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ return ret;
}
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
@@ -1258,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
uctxt->rcvhdrq);
binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
fd->subctxt,
- uctxt->egrbufs.rcvtids[0].phys);
+ uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
fd->subctxt, 0);
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 1000e0fd96d9..7eef11b316ff 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -64,6 +64,8 @@
#include <linux/kthread.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
+#include <rdma/ib_hdrs.h>
+#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -171,12 +173,12 @@ struct ctxt_eager_bufs {
u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
ssize_t len;
} *buffers;
struct {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
} *rcvtids;
};
@@ -207,8 +209,8 @@ struct hfi1_ctxtdata {
/* size of each of the rcvhdrq entries */
u16 rcvhdrqentsize;
/* mmap of hdrq, must fit in 44 bits */
- dma_addr_t rcvhdrq_phys;
- dma_addr_t rcvhdrqtailaddr_phys;
+ dma_addr_t rcvhdrq_dma;
+ dma_addr_t rcvhdrqtailaddr_dma;
struct ctxt_eager_bufs egrbufs;
/* this receive context's assigned PIO ACK send context */
struct send_context *sc;
@@ -350,7 +352,7 @@ struct hfi1_packet {
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
struct rvt_qp *qp;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -529,6 +531,7 @@ struct hfi1_msix_entry {
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
+ struct irq_affinity_notify notify;
};
/* per-SL CCA information */
@@ -605,6 +608,7 @@ struct hfi1_pportdata {
struct work_struct freeze_work;
struct work_struct link_downgrade_work;
struct work_struct link_bounce_work;
+ struct delayed_work start_link_work;
/* host link state variables */
struct mutex hls_lock;
u32 host_link_state;
@@ -659,6 +663,7 @@ struct hfi1_pportdata {
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
u8 last_pstate; /* info only */
+ u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
@@ -1058,8 +1063,6 @@ struct hfi1_devdata {
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
- /* high volume overflow errors deferred to tasklet */
- struct tasklet_struct error_tasklet;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1162,7 +1165,7 @@ struct hfi1_devdata {
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
- dma_addr_t rcvhdrtail_dummy_physaddr;
+ dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
@@ -1173,6 +1176,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
+ struct rhashtable sdma_rht;
struct kobject kobj;
};
@@ -1266,15 +1270,32 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
((!!(rhf_dc_info(rhf))) << 4);
}
+#define HFI1_JKEY_WIDTH 16
+#define HFI1_JKEY_MASK (BIT(16) - 1)
+#define HFI1_ADMIN_JKEY_RANGE 32
+
+/*
+ * J_KEYs are split and allocated in the following groups:
+ * 0 - 31 - users with administrator privileges
+ * 32 - 63 - kernel protocols using KDETH packets
+ * 64 - 65535 - all other users using KDETH packets
+ */
static inline u16 generate_jkey(kuid_t uid)
{
- return from_kuid(current_user_ns(), uid) & 0xffff;
+ u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
+
+ if (capable(CAP_SYS_ADMIN))
+ jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
+ else if (jkey < 64)
+ jkey |= BIT(HFI1_JKEY_WIDTH - 1);
+
+ return jkey;
}
/*
@@ -1584,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
@@ -1656,7 +1677,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
const struct pci_device_id *);
void hfi1_free_devdata(struct hfi1_devdata *);
-void cc_state_reclaim(struct rcu_head *rcu);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
@@ -1788,7 +1808,7 @@ extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
extern unsigned int user_credit_return_threshold;
extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
extern uint kdeth_qp;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index a358d23ecd54..60db61536fed 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
/* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
return rcd;
bail:
+ dd->rcd[ctxt] = NULL;
kfree(rcd->egrbufs.rcvtids);
kfree(rcd->egrbufs.buffers);
kfree(rcd);
@@ -500,6 +501,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
INIT_WORK(&ppd->sma_message_work, handle_sma_message);
INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+ INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
@@ -708,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_physaddr,
+ &dd->rcvhdrtail_dummy_dma,
GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
@@ -941,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (rcd->rcvhdrq) {
dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
- rcd->rcvhdrq, rcd->rcvhdrq_phys);
+ rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
if (rcd->rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)rcd->rcvhdrtail_kvaddr,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
}
@@ -955,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.rcvtids);
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].phys)
+ if (rcd->egrbufs.buffers[e].dma)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
- rcd->egrbufs.buffers[e].phys);
+ rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
@@ -1333,7 +1335,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
spin_unlock(&ppd->cc_state_lock);
if (cc_state)
- call_rcu(&cc_state->rcu, cc_state_reclaim);
+ kfree_rcu(cc_state, rcu);
}
free_credit_return(dd);
@@ -1353,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
if (dd->rcvhdrtail_dummy_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
}
@@ -1576,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t phys_hdrqtail;
+ dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1589,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+ &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
@@ -1601,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+ rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
@@ -1633,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
* before enabling any receive context
*/
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
return 0;
@@ -1644,7 +1646,7 @@ bail_free:
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
bail:
return -ENOMEM;
@@ -1705,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[idx].addr =
dma_zalloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].phys,
+ &rcd->egrbufs.buffers[idx].dma,
gfp_flags);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
rcd->egrbufs.buffers[idx].addr;
- rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys =
- rcd->egrbufs.buffers[idx].phys;
+ rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
+ rcd->egrbufs.buffers[idx].dma;
rcd->egrbufs.alloced++;
alloced_bytes += rcd->egrbufs.rcvtid_size;
idx++;
@@ -1754,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (i = 0, j = 0, offset = 0; j < idx; i++) {
if (i >= rcd->egrbufs.count)
break;
- rcd->egrbufs.rcvtids[i].phys =
- rcd->egrbufs.buffers[j].phys + offset;
+ rcd->egrbufs.rcvtids[i].dma =
+ rcd->egrbufs.buffers[j].dma + offset;
rcd->egrbufs.rcvtids[i].addr =
rcd->egrbufs.buffers[j].addr + offset;
rcd->egrbufs.alloced++;
- if ((rcd->egrbufs.buffers[j].phys + offset +
+ if ((rcd->egrbufs.buffers[j].dma + offset +
new_size) ==
- (rcd->egrbufs.buffers[j].phys +
+ (rcd->egrbufs.buffers[j].dma +
rcd->egrbufs.buffers[j].len)) {
j++;
offset = 0;
@@ -1813,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
- rcd->egrbufs.rcvtids[idx].phys, order);
+ rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
goto bail;
@@ -1825,9 +1827,9 @@ bail_rcvegrbuf_phys:
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[idx].len,
rcd->egrbufs.buffers[idx].addr,
- rcd->egrbufs.buffers[idx].phys);
+ rcd->egrbufs.buffers[idx].dma);
rcd->egrbufs.buffers[idx].addr = NULL;
- rcd->egrbufs.buffers[idx].phys = 0;
+ rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
bail:
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 1263abe01999..9487c9bb8920 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* offline.
*/
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
set_link_state(ppd, link_state);
@@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
if (key == okey)
continue;
/*
- * Don't update pkeys[2], if an HFI port without MgmtAllowed
- * by neighbor is a switch.
- */
- if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
- continue;
- /*
* The SM gives us the complete PKey table. We have
* to ensure that we put the PKeys in the matching
* slots.
@@ -1819,6 +1812,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
+ if (dd->pport->port_type != PORT_TYPE_QSFP) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
#define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
#define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
@@ -2599,7 +2597,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- unsigned long port_num;
+ u8 port_num;
unsigned long vl;
u32 vl_select_mask;
int vfi;
@@ -2633,9 +2631,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
- if ((u8)port_num != port) {
+ if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
@@ -2837,7 +2835,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3010,7 +3008,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3247,7 +3245,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
*/
port_mask = be64_to_cpu(req->port_select_mask[3]);
port_num = find_first_bit((unsigned long *)&port_mask,
- sizeof(port_mask));
+ sizeof(port_mask) * 8);
if (port_num != port) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3398,7 +3396,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
spin_unlock(&ppd->cc_state_lock);
- call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+ kfree_rcu(old_cc_state, rcu);
}
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
@@ -3553,13 +3551,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
-void cc_state_reclaim(struct rcu_head *rcu)
-{
- struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
-
- kfree(cc_state);
-}
-
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
u32 *resp_len)
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ac1bf4a73571..50a3a36d9363 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -551,11 +551,11 @@ static inline u32 group_size(u32 group)
}
/*
- * Obtain the credit return addresses, kernel virtual and physical, for the
+ * Obtain the credit return addresses, kernel virtual and bus, for the
* given sc.
*
* To understand this routine:
- * o va and pa are arrays of struct credit_return. One for each physical
+ * o va and dma are arrays of struct credit_return. One for each physical
* send context, per NUMA.
* o Each send context always looks in its relative location in a struct
* credit_return for its credit return.
@@ -563,14 +563,14 @@ static inline u32 group_size(u32 group)
* with the same value. Use the address of the first send context in the
* group.
*/
-static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
+static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
{
u32 gc = group_context(sc->hw_context, sc->group);
u32 index = sc->hw_context & 0x7;
sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
- *pa = (unsigned long)
- &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
+ *dma = (unsigned long)
+ &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
}
/*
@@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
- dma_addr_t pa;
+ dma_addr_t dma;
unsigned long flags;
u64 reg;
u32 thresh;
@@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->sw_index = sw_index;
sc->hw_context = hw_context;
- cr_group_addresses(sc, &pa);
+ cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
/* PIO Send Memory Address details */
@@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
/* set up credit return */
- reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
+ reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
/*
@@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd)
dd->cr_base[i].va = dma_zalloc_coherent(
&dd->pcidev->dev,
bytes,
- &dd->cr_base[i].pa,
+ &dd->cr_base[i].dma,
GFP_KERNEL);
if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd)
TXE_NUM_CONTEXTS *
sizeof(struct credit_return),
dd->cr_base[i].va,
- dd->cr_base[i].pa);
+ dd->cr_base[i].dma);
}
}
kfree(dd->cr_base);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 464cbd27b975..e709eaf743b5 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -154,7 +154,7 @@ struct credit_return {
/* NUMA indexed credit return array */
struct credit_return_base {
struct credit_return *va;
- dma_addr_t pa;
+ dma_addr_t dma;
};
/* send context configuration sizes (one per type) */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b58849..aa7773643107 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
preempt_enable();
}
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
@@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
#define mshift(x) (8 * (x))
/*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry. Other bytes are zeroed. Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
-{
- unsigned long off;
-
- if (nbytes == 0) {
- pbuf->carry.val64 = 0;
- } else {
- /* align our pointer */
- off = (unsigned long)from & 0x7;
- from = (void *)((unsigned long)from & ~0x7l);
- pbuf->carry.val64 = ((*(u64 *)from)
- << zshift(nbytes + off))/* zero upper bytes */
- >> zshift(nbytes); /* place at bottom */
- }
- pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
-{
- unsigned long off = (unsigned long)from & 0x7;
- unsigned int room, xbytes;
-
- /* align our pointer */
- from = (void *)((unsigned long)from & ~0x7l);
-
- /* check count first - don't read anything if count is zero */
- while (nbytes) {
- /* find the number of bytes in this u64 */
- room = 8 - off; /* this u64 has room for this many bytes */
- xbytes = min(room, nbytes);
-
- /*
- * shift down to zero lower bytes, shift up to zero upper
- * bytes, shift back down to move into place
- */
- pbuf->carry.val64 |= (((*(u64 *)from)
- >> mshift(off))
- << zshift(xbytes))
- >> zshift(xbytes + pbuf->carry_bytes);
- off = 0;
- pbuf->carry_bytes += xbytes;
- nbytes -= xbytes;
- from += sizeof(u64);
- }
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- unsigned int remaining;
-
- if (zbytes == 0) /* nothing to do */
- return;
-
- remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
-
- /* NOTE: zshift only guaranteed to work if remaining != 0 */
- if (remaining)
- pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
- >> zshift(remaining);
- else
- pbuf->carry.val64 = 0;
- pbuf->carry_bytes = remaining;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
- struct pio_buf *pbuf,
- void __iomem *dest,
- const void *src)
-{
- u64 new, temp;
-
- new = *(u64 *)src;
- temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
- writeq(temp, dest);
- pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
- writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry. If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
- if (pbuf->carry_bytes) {
- /* unused bytes are always kept zeroed, so just write */
- writeq(pbuf->carry.val64, dest);
- return 1;
- }
-
- return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
+ /* fall through */
case 6:
*dest++ = *src++;
+ /* fall through */
case 5:
*dest++ = *src++;
+ /* fall through */
case 4:
*dest++ = *src++;
+ /* fall through */
case 3:
*dest++ = *src++;
+ /* fall through */
case 2:
*dest++ = *src++;
+ /* fall through */
case 1:
*dest++ = *src++;
+ /* fall through */
}
}
@@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
+ pbuf->carry.val64 = 0;
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
@@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
}
/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
+ * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
*
* NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
- void *dest,
+ void __iomem *dest,
const void *src)
{
- u32 remainder = 8 - pbuf->carry_bytes;
+ u64 new, temp;
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
- writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+ new = *(u64 *)src;
+ temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+ writeq(temp, dest);
+ pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
@@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
- u64 zero = 0;
-
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ /* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
-#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
@@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
- unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
- unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+ unsigned long qw_to_write = nbytes >> 3;
+ unsigned long bytes_left = nbytes & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- /* adjust carry */
- if (pbuf->carry_bytes < bytes_left) {
- /* need to read more */
- read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
+ pbuf->qw_written += qw_to_write;
+
+ /* handle carry and left-over bytes */
+ if (pbuf->carry_bytes + bytes_left >= 8) {
+ unsigned long nread;
+
+ /* there is enough to fill another qw - fill carry */
+ nread = 8 - pbuf->carry_bytes;
+ read_extra_bytes(pbuf, from, nread);
+
+ /*
+ * One more write - but need to make sure dest is correct.
+ * Check for wrap and the possibility the write
+ * should be in SOP space.
+ *
+ * The two checks immediately below cannot both be true, hence
+ * the else. If we have wrapped, we cannot still be within the
+ * first block. Conversely, if we are still in the first block,
+ * we cannot have wrapped. We do the wrap check first as that
+ * is more likely.
+ */
+ /* adjust if we have wrapped */
+ if (dest >= pbuf->end)
+ dest -= pbuf->size;
+ /* jump to the SOP range if within the first block */
+ else if (pbuf->qw_written < PIO_BLOCK_QWS)
+ dest += SOP_DISTANCE;
+
+ /* flush out full carry */
+ carry8_write8(pbuf->carry, dest);
+ pbuf->qw_written++;
+
+ /* now adjust and read the rest of the bytes into carry */
+ bytes_left -= nread;
+ from += nread; /* from is now not aligned */
+ read_low_bytes(pbuf, from, bytes_left);
} else {
- /* remove invalid bytes */
- zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
+ /* not enough to fill another qw, append the rest to carry */
+ read_extra_bytes(pbuf, from, bytes_left);
}
-
- pbuf->qw_written += qw_to_write;
}
/*
@@ -771,6 +649,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_extra_bytes(pbuf, from, to_fill);
from += to_fill;
nbytes -= to_fill;
+ /* may not be enough valid bytes left to align */
+ if (extra > nbytes)
+ extra = nbytes;
/* ...now write carry */
dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +679,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
read_low_bytes(pbuf, from, extra);
from += extra;
nbytes -= extra;
+ /*
+ * If no bytes are left, return early - we are done.
+ * NOTE: This short-circuit is *required* because
+ * "extra" may have been reduced in size and "from"
+ * is not aligned, as required when leaving this
+ * if block.
+ */
+ if (nbytes == 0)
+ return;
}
/* at this point, from is QW aligned */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 965c8aef0c60..202433178864 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -47,29 +47,39 @@
#include "hfi.h"
#include "efivar.h"
+#include "eprom.h"
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
unsigned long size = 0;
u8 *temp_platform_config = NULL;
+ u32 esize;
+
+ ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ size = esize;
+ goto success;
+ }
+ /* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
- if (ret) {
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
- /* fall back to request firmware */
- platform_config_load = 1;
- goto bail;
- }
+ if (!ret)
+ goto success;
+
+ dd_dev_info(dd,
+ "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+ __func__);
+ /* fall back to request firmware */
+ platform_config_load = 1;
+ return;
+success:
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
-
-bail:
- /* exit */;
}
void free_platform_config(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index a5aa3517e7d5..9fc75e7e8781 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp)
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
}
@@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp)
*/
void hfi1_schedule_send(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
+ * hfi1_get_credit - handle credit in aeth
+ * @qp: the qp
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
@@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
+ lockdep_assert_held(&qp->s_lock);
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
@@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
static int iowait_sleep(
@@ -544,7 +544,7 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
iter->dev = dev;
iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
- if (qp_iter_next(iter)) {
- kfree(iter);
- return NULL;
- }
return iter;
}
@@ -812,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
kfree(priv);
return ERR_PTR(-ENOMEM);
}
+ iowait_init(
+ &priv->s_iowait,
+ 1,
+ _hfi1_do_send,
+ iowait_sleep,
+ iowait_wakeup,
+ iowait_sdma_drained);
setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
qp->s_timer.function = hfi1_rc_timeout;
return priv;
@@ -852,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi)
void flush_qp_waiters(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_stop_rc_timers(qp);
}
@@ -877,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- iowait_init(
- &priv->s_iowait,
- 1,
- _hfi1_do_send,
- iowait_sleep,
- iowait_wakeup,
- iowait_sdma_drained);
priv->r_adefered = 0;
clear_ahg(qp);
}
@@ -967,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index a207717ade2a..1869f639c3ae 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
bus->algo.getsda = hfi1_getsda;
bus->algo.getscl = hfi1_getscl;
bus->algo.udelay = 5;
- bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.timeout = usecs_to_jiffies(100000);
bus->algo.data = bus;
bus->adapter.owner = THIS_MODULE;
@@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
u8 *data)
{
struct hfi1_pportdata *ppd;
- u32 excess_len = 0;
- int ret = 0;
+ u32 excess_len = len;
+ int ret = 0, offset = 0;
if (port_num > dd->num_pports || port_num < 1) {
dd_dev_info(dd, "%s: Invalid port number %d\n",
@@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
}
memcpy(data, &ppd->qsfp_info.cache[addr], len);
+
+ if (addr <= QSFP_MONITOR_VAL_END &&
+ (addr + len) >= QSFP_MONITOR_VAL_START) {
+ /* Overlap with the dynamic channel monitor range */
+ if (addr < QSFP_MONITOR_VAL_START) {
+ if (addr + len <= QSFP_MONITOR_VAL_END)
+ len = addr + len - QSFP_MONITOR_VAL_START;
+ else
+ len = QSFP_MONITOR_RANGE;
+ offset = QSFP_MONITOR_VAL_START - addr;
+ addr = QSFP_MONITOR_VAL_START;
+ } else if (addr == QSFP_MONITOR_VAL_START) {
+ offset = 0;
+ if (addr + len > QSFP_MONITOR_VAL_END)
+ len = QSFP_MONITOR_RANGE;
+ } else {
+ offset = 0;
+ if (addr + len > QSFP_MONITOR_VAL_END)
+ len = QSFP_MONITOR_VAL_END - addr + 1;
+ }
+ /* Refresh the values of the dynamic monitors from the cable */
+ ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len);
+ if (ret != len) {
+ ret = -EAGAIN;
+ goto set_zeroes;
+ }
+ }
+
return 0;
set_zeroes:
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 69275ebd9597..36cf52359848 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -74,6 +74,9 @@
/* Defined fields that Intel requires of qualified cables */
/* Byte 0 is Identifier, not checked */
/* Byte 1 is reserved "status MSB" */
+#define QSFP_MONITOR_VAL_START 22
+#define QSFP_MONITOR_VAL_END 81
+#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1)
#define QSFP_TX_CTRL_BYTE_OFFS 86
#define QSFP_PWR_CTRL_BYTE_OFFS 93
#define QSFP_CDR_CTRL_BYTE_OFFS 98
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5da190e6011b..8bc5013f39a1 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -55,7 +55,7 @@
#include "trace.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
+#define OP(x) RC_OP(x)
/**
* hfi1_add_retry_timer - add/start a retry timer
@@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
@@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
add_timer(&priv->s_rnr_timer);
@@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
@@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
{
int rval = 0;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry */
if (qp->s_flags & RVT_S_TIMER) {
qp->s_flags &= ~RVT_S_TIMER;
@@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
@@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
int rval = 0;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from rnr timer */
if (qp->s_flags & RVT_S_WAIT_RNR) {
qp->s_flags &= ~RVT_S_WAIT_RNR;
@@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp)
del_timer_sync(&priv->s_rnr_timer);
}
-/* only opcode mask for adaptive pio */
-const u32 rc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
- BIT(OP(ACKNOWLEDGE & 0x1f)) |
- BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
- BIT(OP(COMPARE_SWAP & 0x1f)) |
- BIT(OP(FETCH_ADD & 0x1f));
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
* Note the QP s_lock must be held.
*/
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
@@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
@@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
@@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
int middle = 0;
int delta;
+ lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
goto bail_no_tx;
@@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr + len);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr + len,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
@@ -841,7 +835,7 @@ bail_no_tx:
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
+ * send side QP state and send engine.
*/
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
@@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
u32 vl, plen;
struct send_context *sc;
struct pio_buf *pbuf;
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
@@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (!pbuf) {
/*
* We have no room to send at the moment. Pass
- * responsibility for sending the ACK to the send tasklet
+ * responsibility for sending the ACK to the send engine
* so that when enough buffer space becomes available,
* the ACK is sent ahead of other outgoing packets.
*/
@@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
return;
queue_ack:
- this_cpu_inc(*ibp->rvp.rc_qacks);
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto unlock;
+ this_cpu_inc(*ibp->rvp.rc_qacks);
qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
if (is_fecn)
qp->s_flags |= RVT_S_ECN;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
hfi1_schedule_send(qp);
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
+ lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
/*
@@ -1027,7 +1025,7 @@ done:
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
- * asynchronously before the send tasklet can get scheduled.
+ * asynchronously before the send engine can get scheduled.
* Doing it in hfi1_make_rc_req() is too late.
*/
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
@@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
@@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe;
u32 n = qp->s_last;
+ lockdep_assert_held(&qp->s_lock);
/* Find the work request corresponding to the given PSN. */
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
@@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
+ lockdep_assert_held(&qp->s_lock);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct ib_wc wc;
unsigned i;
+ lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int diff;
unsigned long to;
+ lockdep_assert_held(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
{
struct rvt_swqe *wqe;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
hfi1_stop_rc_timers(qp);
@@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void rc_rcv_resp(struct hfi1_ibport *ibp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen, struct rvt_qp *qp,
u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
struct hfi1_ctxtdata *rcd)
@@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64)be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else {
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
- }
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
@@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
return;
list_del_init(&qp->rspwait);
qp->r_flags &= ~RVT_R_RSP_NAK;
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
/**
@@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
+static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
@@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
}
if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
@@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
case OP(FETCH_ADD): {
/*
* If we didn't find the atomic request in the ack queue
- * or the send tasklet is already backed up to send an
+ * or the send engine is already backed up to send an
* earlier entry, we can ignore this request.
*/
if (!e || e->opcode != (u8)opcode || old_req)
@@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
@@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int copy_last = 0;
u32 rkey;
+ lockdep_assert_held(&qp->r_lock);
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
@@ -2342,7 +2343,7 @@ send_last:
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2397,7 +2398,7 @@ send_last:
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2432,7 +2433,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2469,8 +2470,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2481,11 +2481,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2499,7 +2499,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2575,12 +2575,12 @@ send_ack:
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
int diff;
u32 opcode;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 48d5094f98e2..a1576aea4756 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -352,7 +352,7 @@ err:
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the tasklet, we still
+ * Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
@@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
@@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work)
* @work: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
- * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
void hfi1_do_send(struct rvt_qp *qp)
@@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
- * the send tasklet will be woken up later.
+ * the send engine will be woken up later.
*/
if (hfi1_verbs_send(qp, &ps))
return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc05b349..fd39bcaa062d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
}
/**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ struct sdma_vl_map *m;
+ u8 vl;
+
+ if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+ return -EINVAL;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ vl = m->engine_to_vl[sde->this_idx];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
* @selector: a spreading factor
@@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}
+struct sdma_rht_map_elem {
+ u32 mask;
+ u8 ctr;
+ struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+ unsigned long cpu_id;
+ struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+ struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+ .nelem_hint = NR_CPUS_HINT,
+ .head_offset = offsetof(struct sdma_rht_node, node),
+ .key_offset = offsetof(struct sdma_rht_node, cpu_id),
+ .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .max_size = NR_CPUS,
+ .min_size = 8,
+ .automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct sdma_rht_node *rht_node;
+ struct sdma_engine *sde = NULL;
+ const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ unsigned long cpu_id;
+
+ /*
+ * To ensure that always the same sdma engine(s) will be
+ * selected make sure the process is pinned to this CPU only.
+ */
+ if (cpumask_weight(current_mask) != 1)
+ goto out;
+
+ cpu_id = smp_processor_id();
+ rcu_read_lock();
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+ if (rht_node && rht_node->map[vl]) {
+ struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+ sde = map->sde[selector & map->mask];
+ }
+ rcu_read_unlock();
+
+ if (sde)
+ return sde;
+
+out:
+ return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+ int i;
+
+ for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+ map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+ struct sdma_engine *sde)
+{
+ unsigned int i, pow;
+
+ /* only need to check the first ctr entries for a match */
+ for (i = 0; i < map->ctr; i++) {
+ if (map->sde[i] == sde) {
+ memmove(&map->sde[i], &map->sde[i + 1],
+ (map->ctr - i - 1) * sizeof(map->sde[0]));
+ map->ctr--;
+ pow = roundup_pow_of_two(map->ctr ? : 1);
+ map->mask = pow - 1;
+ sdma_populate_sde_map(map);
+ break;
+ }
+ }
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ cpumask_var_t mask, new_mask;
+ unsigned long cpu;
+ int ret, vl, sz;
+
+ vl = sdma_engine_get_vl(sde);
+ if (unlikely(vl < 0))
+ return -EINVAL;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ ret = cpulist_parse(buf, mask);
+ if (ret)
+ goto out_free;
+
+ if (!cpumask_subset(mask, cpu_online_mask)) {
+ dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ sz = sizeof(struct sdma_rht_map_elem) +
+ (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+ mutex_lock(&process_to_sde_mutex);
+
+ for_each_cpu(cpu, mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Check if we have this already mapped */
+ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+ cpumask_set_cpu(cpu, new_mask);
+ continue;
+ }
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (!rht_node) {
+ rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+ if (!rht_node) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+ if (!rht_node->map[vl]) {
+ kfree(rht_node);
+ ret = -ENOMEM;
+ goto out;
+ }
+ rht_node->cpu_id = cpu;
+ rht_node->map[vl]->mask = 0;
+ rht_node->map[vl]->ctr = 1;
+ rht_node->map[vl]->sde[0] = sde;
+
+ ret = rhashtable_insert_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ if (ret) {
+ kfree(rht_node->map[vl]);
+ kfree(rht_node);
+ dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+ cpu);
+ goto out;
+ }
+
+ } else {
+ int ctr, pow;
+
+ /* Add new user mappings */
+ if (!rht_node->map[vl])
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+ if (!rht_node->map[vl]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl]->ctr++;
+ ctr = rht_node->map[vl]->ctr;
+ rht_node->map[vl]->sde[ctr - 1] = sde;
+ pow = roundup_pow_of_two(ctr);
+ rht_node->map[vl]->mask = pow - 1;
+
+ /* Populate the sde map table */
+ sdma_populate_sde_map(rht_node->map[vl]);
+ }
+ cpumask_set_cpu(cpu, new_mask);
+ }
+
+ /* Clean up old mappings */
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Don't cleanup sdes that are set in the new mask */
+ if (cpumask_test_cpu(cpu, mask))
+ continue;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (rht_node) {
+ bool empty = true;
+ int i;
+
+ /* Remove mappings for old sde */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ if (rht_node->map[i])
+ sdma_cleanup_sde_map(rht_node->map[i],
+ sde);
+
+ /* Free empty hash table entries */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i])
+ continue;
+
+ if (rht_node->map[i]->ctr) {
+ empty = false;
+ break;
+ }
+ }
+
+ if (empty) {
+ ret = rhashtable_remove_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ WARN_ON(ret);
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+ }
+ }
+ }
+
+ cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+ mutex_unlock(&process_to_sde_mutex);
+out_free:
+ free_cpumask_var(mask);
+ free_cpumask_var(new_mask);
+ return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ mutex_lock(&process_to_sde_mutex);
+ if (cpumask_empty(&sde->cpu_mask))
+ snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+ else
+ cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+ mutex_unlock(&process_to_sde_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+ struct sdma_rht_node *rht_node = ptr;
+ int i;
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+ struct hfi1_devdata *dd,
+ unsigned long cpuid)
+{
+ struct sdma_rht_node *rht_node;
+ int i, j;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ sdma_rht_params);
+ if (!rht_node)
+ return;
+
+ seq_printf(s, "cpu%3lu: ", cpuid);
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+ continue;
+
+ seq_printf(s, " vl%d: [", i);
+
+ for (j = 0; j < rht_node->map[i]->ctr; j++) {
+ if (!rht_node->map[i]->sde[j])
+ continue;
+
+ if (j > 0)
+ seq_puts(s, ",");
+
+ seq_printf(s, " sdma%2d",
+ rht_node->map[i]->sde[j]->this_idx);
+ }
+ seq_puts(s, " ]");
+ }
+
+ seq_puts(s, "\n");
+}
+
/*
* Free the indicated map struct
*/
@@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;
+
+ if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ goto bail;
+
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
@@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
+ rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -2086,6 +2439,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2100,18 +2458,18 @@ nodesc:
* side locking.
*
* Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list)
+ struct list_head *tx_list, u32 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
unsigned long flags;
u16 tail = INVALID_TAIL;
- int count = 0;
+ u32 submit_count = 0, flush_count = 0, total_count;
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
@@ -2127,33 +2485,34 @@ retry:
}
list_del_init(&tx->list);
tail = submit_tx(sde, tx);
- count++;
+ submit_count++;
if (tail != INVALID_TAIL &&
- (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+ (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
sdma_update_tail(sde, tail);
tail = INVALID_TAIL;
}
}
update_tail:
+ total_count = submit_count + flush_count;
if (wait)
- iowait_sdma_add(wait, count);
+ iowait_sdma_add(wait, total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret == 0 ? count : ret;
+ *count_out = total_count;
+ return ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
- if (wait)
- iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
list_add_tail(&tx->list, &sde->flushlist);
+ flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..56257ea3598f 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@ struct sdma_engine {
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
+ struct cpumask cpu_mask;
+ struct kobject kobj;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
struct sdma_txreq *tx);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
- struct list_head *tx_list);
+ struct list_head *tx_list,
+ u32 *count);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl(
u32 selector,
u8 vl);
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl);
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count);
+int sdma_engine_get_vl(struct sdma_engine *sde);
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
+void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
+ unsigned long cpuid);
#ifdef CONFIG_SDMA_VERBOSITY
void sdma_dumpstate(struct sdma_engine *);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c655f7e..edba22461a9c 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,95 @@ bail:
return ret;
}
+struct sde_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct sdma_engine *sde, char *buf);
+ ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
+};
+
+static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!sde_attr->show)
+ return -EINVAL;
+
+ return sde_attr->show(sde, buf);
+}
+
+static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!sde_attr->store)
+ return -EINVAL;
+
+ return sde_attr->store(sde, buf, count);
+}
+
+static const struct sysfs_ops sde_sysfs_ops = {
+ .show = sde_show,
+ .store = sde_store,
+};
+
+static struct kobj_type sde_ktype = {
+ .sysfs_ops = &sde_sysfs_ops,
+};
+
+#define SDE_ATTR(_name, _mode, _show, _store) \
+ struct sde_attribute sde_attr_##_name = \
+ __ATTR(_name, _mode, _show, _store)
+
+static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ return sdma_get_cpu_to_sde_map(sde, buf);
+}
+
+static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
+ const char *buf, size_t count)
+{
+ return sdma_set_cpu_to_sde_map(sde, buf, count);
+}
+
+static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
+{
+ int vl;
+
+ vl = sdma_engine_get_vl(sde);
+ if (vl < 0)
+ return vl;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+}
+
+static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+ sde_show_cpu_to_sde_map,
+ sde_store_cpu_to_sde_map);
+static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL);
+
+static struct sde_attribute *sde_attribs[] = {
+ &sde_attr_cpu_list,
+ &sde_attr_vl
+};
+
/*
* Register and create our files in /sys/class/infiniband.
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
+ struct device *class_dev = &dev->dev;
+ int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
goto bail;
}
+ for (i = 0; i < dd->num_sdma; i++) {
+ ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
+ &sde_ktype, &class_dev->kobj,
+ "sdma%d", i);
+ if (ret)
+ goto bail;
+
+ for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
+ ret = sysfs_create_file(&dd->per_sdma[i].kobj,
+ &sde_attribs[j]->attr);
+ if (ret)
+ goto bail;
+ }
+ }
+
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
+
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_del(&dd->per_sdma[i].kobj);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 4cfb13771897..01f525cd985a 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,9 +47,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
+u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u8 opcode;
u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %lld"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
+#define ATOMICACKETH_PRN "origdata %llx"
+#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
-static u64 ib_u64_get(__be32 *p)
-{
- return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
-}
-
static const char *parse_syndrome(u8 syndrome)
{
switch (syndrome >> 5) {
@@ -113,8 +108,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length),
be32_to_cpu(eh->rc.imm_data));
@@ -126,8 +120,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length));
break;
@@ -145,20 +138,16 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
- (unsigned long long)
- ib_u64_get(eh->at.atomic_ack_eth));
+ ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
- (unsigned long long)ib_u64_get(
- eh->atomic_eth.vaddr),
+ get_ib_ateth_vaddr(&eh->atomic_eth),
eh->atomic_eth.rkey,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.swap_data),
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.compare_data));
+ get_ib_ateth_swap(&eh->atomic_eth),
+ get_ib_ateth_compare(&eh->atomic_eth));
break;
/* deth */
case OP(UD, SEND_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 31654bbac1cf..26ae789e47cf 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, hw_free)
__field(void __iomem *, piobase)
__field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
+ __field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
+ __field(u64, rcvegr_dma)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
@@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
__entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
+ __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
),
TP_printk("[%s] ctxt %u " UCTXT_FMT,
__get_str(dev),
@@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
+ __entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_phys
+ __entry->rcvegr_dma
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index c3e41aed0034..382fcda3a5f6 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,7 +55,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+u8 ibhdr_exhdr_len(struct ib_header *hdr);
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -74,7 +74,7 @@ __print_symbolic(lrh, \
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
+ struct ib_header *hdr),
TP_ARGS(dd, hdr),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
/* LRH */
@@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 9ba1f615ec95..11e02b228922 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TRACE_EVENT(snoop_capture,
TP_PROTO(struct hfi1_devdata *dd,
int hdr_len,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
int data_len,
void *data),
TP_ARGS(dd, hdr_len, hdr, data_len, data),
@@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture,
__dynamic_array(u8, raw_pkt, data_len)
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
if (__entry->lnh == HFI1_LRH_BTH)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index a726d96d185f..5e6d1bac4914 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -50,14 +50,7 @@
#include "qp.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/* only opcode mask for adaptive pio */
-const u32 uc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+#define OP(x) UC_OP(x)
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
@@ -70,7 +63,7 @@ const u32 uc_only_opcode =
int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords = 5;
u32 bth0 = 0;
@@ -304,12 +297,12 @@ bail_no_tx:
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f01e8e1d62d3..97ae24b6314c 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -271,7 +271,7 @@ drop:
int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
@@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
/*
* opa_smp_check() - Do the regular pkey checking, and the additional
- * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
- * ("SMA Packet Checks").
+ * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
+ * 9.10.25 ("SMA Packet Checks").
*
* Note that:
* - Checks are done using the pkey directly from the packet's BTH,
@@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/*
* SMPs fall into one of four (disjoint) categories:
- * SMA request, SMA response, trap, or trap repress.
- * Our response depends, in part, on which type of
- * SMP we're processing.
+ * SMA request, SMA response, SMA trap, or SMA trap repress.
+ * Our response depends, in part, on which type of SMP we're
+ * processing.
*
- * If this is not an SMA request, or trap repress:
- * - accept MAD if the port is running an SM
- * - pkey == FULL_MGMT_P_KEY =>
- * reply with unsupported method (i.e., just mark
- * the smp's status field here, and let it be
- * processed normally)
- * - pkey != LIM_MGMT_P_KEY =>
- * increment port recv constraint errors, drop MAD
- * If this is an SMA request or trap repress:
+ * If this is an SMA response, skip the check here.
+ *
+ * If this is an SMA request or SMA trap repress:
* - pkey != FULL_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
+ *
+ * Otherwise:
+ * - accept if the port is running an SM
+ * - drop MAD if it's an SMA trap
+ * - pkey == FULL_MGMT_P_KEY =>
+ * reply with unsupported method
+ * - pkey != FULL_MGMT_P_KEY =>
+ * increment port recv constraint errors, drop MAD
*/
switch (smp->method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ break;
case IB_MGMT_METHOD_GET:
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_REPORT:
@@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
return 1;
}
break;
- case IB_MGMT_METHOD_SEND:
- case IB_MGMT_METHOD_TRAP:
- case IB_MGMT_METHOD_GET_RESP:
- case IB_MGMT_METHOD_REPORT_RESP:
+ default:
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return 0;
+ if (smp->method == IB_MGMT_METHOD_TRAP)
+ return 1;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
return 0;
}
- if (pkey != LIM_MGMT_P_KEY) {
- ingress_pkey_table_fail(ppd, pkey, slid);
- return 1;
- }
- break;
- default:
- break;
+ ingress_pkey_table_fail(ppd, pkey, slid);
+ return 1;
}
return 0;
}
@@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
@@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ u8 sc5 = hdr2sc(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0ecf27903dc2..a761f804111e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_SHIFT 24
#define KDETH_HCRC_LOWER_MASK 0xff
+#define AHG_KDETH_INTR_SHIFT 12
+
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -546,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
u8 opcode, sc, vl;
int req_queued = 0;
u16 dlid;
- u8 selector;
+ u32 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -751,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dlid = be16_to_cpu(req->hdr.lrh[1]);
selector = dlid_to_selector(dlid);
+ selector += uctxt->ctxt + fd->subctxt;
+ req->sde = sdma_select_user_engine(dd, selector, vl);
- /* Have to select the engine */
- req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt +
- selector),
- vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
goto free_req;
@@ -892,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
{
- int ret = 0;
+ int ret = 0, count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -1088,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
- if (list_empty(&req->txps)) {
- req->seqsubmitted = req->seqnum;
- if (req->seqnum == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
- /*
- * The txreq has already been submitted to the HW queue
- * so we can free the AHG entry now. Corruption will not
- * happen due to the sequential manner in which
- * descriptors are processed.
- */
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
- sdma_ahg_free(req->sde, req->ahg_idx);
- }
- } else if (ret > 0) {
- req->seqsubmitted += ret;
- ret = 0;
+ ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ req->seqsubmitted += count;
+ if (req->seqsubmitted == req->info.npkts) {
+ set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ /*
+ * The txreq has already been submitted to the HW queue
+ * so we can free the AHG entry now. Corruption will not
+ * happen due to the sequential manner in which
+ * descriptors are processed.
+ */
+ if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1480,7 +1474,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* Clear KDETH.SH on last packet */
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) >> 16);
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT);
val &= cpu_to_le16(~(1U << 13));
AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
} else {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index f803f7b5ef5d..4b7a16ceb362 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF;
module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-unsigned int hfi1_max_cqes = 0x2FFFF;
+unsigned int hfi1_max_cqes = 0x2FFFFF;
module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqes,
"Maximum number of completion queue entries to support");
@@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF;
module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-unsigned int hfi1_max_qps = 16384;
+unsigned int hfi1_max_qps = 32768;
module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
@@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
[IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
- [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
+ [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8,
[IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
@@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_CNP] = &hfi1_cnp_rcv
};
+#define OPMASK 0x1f
+
+static const u32 pio_opmask[BIT(3)] = {
+ /* RC */
+ [IB_OPCODE_RC >> 5] =
+ BIT(RC_OP(SEND_ONLY) & OPMASK) |
+ BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
+ BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
+ BIT(RC_OP(FETCH_ADD) & OPMASK),
+ /* UC */
+ [IB_OPCODE_UC >> 5] =
+ BIT(UC_OP(SEND_ONLY) & OPMASK) |
+ BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
+};
+
/*
* System image GUID.
*/
@@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -719,7 +741,7 @@ static void verbs_sdma_complete(
if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct hfi1_ib_header *hdr;
+ struct ib_header *hdr;
hdr = &tx->phdr.hdr;
hfi1_rc_send_complete(qp, hdr);
@@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
@@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ib_header *h = &tx->phdr.hdr;
+ struct ib_header *h = &tx->phdr.hdr;
if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
return dd->process_pio_send;
@@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_GSI:
case IB_QPT_UD:
break;
- case IB_QPT_RC:
- if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
- iowait_sdma_pending(&priv->s_iowait) == 0 &&
- !sdma_txreq_built(&tx->txreq))
- return dd->process_pio_send;
- break;
case IB_QPT_UC:
+ case IB_QPT_RC: {
+ u8 op = get_opcode(h);
+
if (piothreshold &&
qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+ (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
+ }
default:
break;
}
@@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
- struct hfi1_ib_header *hdr;
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr;
send_routine sr;
int ret;
u8 lnh;
@@ -1755,7 +1773,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
@@ -1782,7 +1800,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ sc5 = hdr2sc(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index d1b101c54828..1c3815d89eb7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -60,6 +60,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
#include <rdma/rdmavt_cq.h>
@@ -80,16 +81,6 @@ struct hfi1_packet;
*/
#define HFI1_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -104,80 +95,16 @@ struct hfi1_packet;
#define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK BIT(31)
-#define IB_BTH_SOLICITED BIT(23)
-#define IB_BTH_MIG_REQ BIT(22)
-
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+#define RC_OP(x) IB_OPCODE_RC_##x
+#define UC_OP(x) IB_OPCODE_UC_##x
+
/* flags passed by hfi1_ib_rcv() */
enum {
HFI1_HAS_GRH = (1 << 0),
};
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-union ib_ehdrs {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
-} __packed;
-
-struct hfi1_other_headers {
- __be32 bth[3];
- union ib_ehdrs u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct hfi1_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct hfi1_other_headers oth;
- } l;
- struct hfi1_other_headers oth;
- } u;
-} __packed;
-
struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
@@ -187,7 +114,7 @@ struct hfi1_ahg_info {
struct hfi1_sdma_header {
__le64 pbc;
- struct hfi1_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp);
@@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg);
void hfi1_del_timers_sync(struct rvt_qp *qp);
void hfi1_stop_rc_timers(struct rvt_qp *qp);
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
@@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
-static inline u8 get_opcode(struct hfi1_ib_header *h)
+static inline u8 get_opcode(struct ib_header *h)
{
u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
@@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index d8fb056526f8..094ab829ec42 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index b738acdb9b02..8ec09e470f84 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -232,7 +232,7 @@ struct i40iw_device {
struct i40e_client *client;
struct i40iw_hw hw;
struct i40iw_cm_core cm_core;
- unsigned long *mem_resources;
+ u8 *mem_resources;
unsigned long *allocated_qps;
unsigned long *allocated_cqs;
unsigned long *allocated_mrs;
@@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
*next = resource_num + 1;
if (*next == max_resources)
*next = 0;
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
*req_resource_num = resource_num;
+ spin_unlock_irqrestore(&iwdev->resource_lock, flags);
return 0;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index c490f8d49864..85637696f6e9 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
buf += hdr_len;
}
- if (pd_len)
- memcpy(buf, pdata->addr, pd_len);
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
atomic_set(&sqbuf->refcount, 1);
@@ -3350,26 +3350,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp)
}
/**
- * i40iw_loopback_nop - Send a nop
- * @qp: associated hw qp
- */
-static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
-{
- u64 *wqe;
- u64 header;
-
- wqe = qp->qp_uk.sq_base->elem;
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(0, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 24, header);
-}
-
-/**
* i40iw_qp_disconnect - free qp and close cm
* @iwqp: associate qp for the connection
*/
@@ -3641,7 +3621,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
} else {
if (iwqp->page)
iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- i40iw_loopback_nop(&iwqp->sc_qp);
+ dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
}
if (iwqp->page)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 3ee0cad96bc6..0c92a40b3e86 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp,
info.dont_send_fin = false;
if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
info.reset_tcp_conn = true;
+ iwqp->hw_iwarp_state = state;
i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 798335fa3105..ac2f3cd9478c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = {
.notifier_call = i40iw_net_event
};
-static int i40iw_notifiers_registered;
+static atomic_t i40iw_notifiers_registered;
/**
* i40iw_find_i40e_handler - find a handler given a client info
@@ -1342,12 +1342,11 @@ exit:
*/
static void i40iw_register_notifiers(void)
{
- if (!i40iw_notifiers_registered) {
+ if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
register_inetaddr_notifier(&i40iw_inetaddr_notifier);
register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
register_netevent_notifier(&i40iw_net_notifier);
}
- i40iw_notifiers_registered++;
}
/**
@@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
/* fallthrough */
case INET_NOTIFIER:
- if (i40iw_notifiers_registered > 0) {
- i40iw_notifiers_registered--;
+ if (!atomic_dec_return(&i40iw_notifiers_registered)) {
unregister_netevent_notifier(&i40iw_net_notifier);
unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
@@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
enum i40iw_status_code status;
struct i40iw_handler *hdl;
+ hdl = i40iw_find_netdev(ldev->netdev);
+ if (hdl)
+ return 0;
+
hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
if (!hdl)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 0e8db0a35141..6fd043b1d714 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
{
if (!mem)
return I40IW_ERR_PARAM;
+ /*
+ * mem->va points to the parent of mem, so both mem and mem->va
+ * can not be touched once mem->va is freed
+ */
kfree(mem->va);
- mem->va = NULL;
return 0;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 2360338877bf..6329c971c22f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
return &iwqp->ibqp;
error:
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- kfree(mem);
return ERR_PTR(err_code);
}
@@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
}
if (iwpbl->pbl_allocated)
i40iw_free_pble(iwdev->pble_rsrc, palloc);
- kfree(iwpbl->iwmr);
- iwpbl->iwmr = NULL;
+ kfree(iwmr);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 2f0b4eed7eae..1ea686b9e0f9 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
checksum == cpu_to_be16(0xffff);
}
-static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
- unsigned tail, struct mlx4_cqe *cqe, int is_eth)
+static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+ unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
}
-
- return 0;
}
static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
@@ -689,12 +687,6 @@ repoll:
is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR;
- if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
- is_send)) {
- pr_warn("Completion for NOP opcode detected!\n");
- return -EINVAL;
- }
-
/* Resize CQ in progress */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
if (cq->resize_buf) {
@@ -720,12 +712,6 @@ repoll:
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
- if (unlikely(!mqp)) {
- pr_warn("CQ %06x with entry for unknown QPN %06x\n",
- cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
- return -EINVAL;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -738,11 +724,6 @@ repoll:
/* SRQ is also in the radix tree */
msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
srq_num);
- if (unlikely(!msrq)) {
- pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
- cq->mcq.cqn, srq_num);
- return -EINVAL;
- }
}
if (is_send) {
@@ -852,9 +833,11 @@ repoll:
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
- MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
- return use_tunnel_data(*cur_qp, cq, wc, tail,
- cqe, is_eth);
+ MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+ use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
+ is_eth);
+ return 0;
+ }
}
wc->slid = be16_to_cpu(cqe->rlid);
@@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct mlx4_ib_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
- int err = 0;
struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
spin_lock_irqsave(&cq->lock, flags);
@@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
}
for (npolled = 0; npolled < num_entries; ++npolled) {
- err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
- if (err)
+ if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
break;
}
@@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return npolled;
- else
- return err;
+ return npolled;
}
int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1301a1db958c..1672907ff219 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1171,6 +1171,27 @@ void handle_port_mgmt_change_event(struct work_struct *work)
/* Generate GUID changed event */
if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+ if (mlx4_is_master(dev->dev)) {
+ union ib_gid gid;
+ int err = 0;
+
+ if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+ err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+ else
+ gid.global.subnet_prefix =
+ eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+ if (err) {
+ pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+ port, err);
+ } else {
+ pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+ port,
+ (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+ be64_to_cpu(gid.global.subnet_prefix));
+ atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
+ }
+ }
mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
/*if master, notify all slaves*/
if (mlx4_is_master(dev->dev))
@@ -2263,6 +2284,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
if (err)
goto demux_err;
dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+ atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+ be64_to_cpu(gid.global.subnet_prefix));
err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
&dev->sriov.sqps[i]);
if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1811eb5b6aab..b597e8227591 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2287,6 +2287,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
+ if (mlx4_is_slave(ibdev->dev))
+ return 0;
+
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */
if (i && !per_port)
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 7d30be0f287b..a21d37f02f35 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
if (!group->members[i])
leave_state |= (1 << i);
- return leave_state & (group->rec.scope_join_state & 7);
+ return leave_state & (group->rec.scope_join_state & 0xf);
}
static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
} else
mcg_warn_group(group, "DRIVER BUG\n");
} else if (group->state == MCAST_LEAVE_SENT) {
- if (group->rec.scope_join_state & 7)
- group->rec.scope_join_state &= 0xf8;
+ if (group->rec.scope_join_state & 0xf)
+ group->rec.scope_join_state &= 0xf0;
group->state = MCAST_IDLE;
mutex_unlock(&group->lock);
if (release_group(group, 1))
@@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
static int handle_join_req(struct mcast_group *group, u8 join_mask,
struct mcast_req *req)
{
- u8 group_join_state = group->rec.scope_join_state & 7;
+ u8 group_join_state = group->rec.scope_join_state & 0xf;
int ref = 0;
u16 status;
struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
u8 cur_join_state;
resp_join_state = ((struct ib_sa_mcmember_data *)
- group->response_sa_mad.data)->scope_join_state & 7;
- cur_join_state = group->rec.scope_join_state & 7;
+ group->response_sa_mad.data)->scope_join_state & 0xf;
+ cur_join_state = group->rec.scope_join_state & 0xf;
if (method == IB_MGMT_METHOD_GET_RESP) {
/* successfull join */
@@ -710,7 +710,7 @@ process_requests:
req = list_first_entry(&group->pending_list, struct mcast_req,
group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
- req_join_state = sa_data->scope_join_state & 0x7;
+ req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and
* update our internal counters. The actual leave will be sent
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8db7cb1a3716..35141f451e5c 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx {
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
- __be64 subnet_prefix;
+ atomic64_t subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 16f654dc8a46..570bc866b1d6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2509,24 +2509,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
- if (is_eth)
+ if (is_eth) {
memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
- else {
- if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- /* When multi-function is enabled, the ib_core gid
- * indexes don't necessarily match the hw ones, so
- * we must use our own cache */
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- subnet_prefix;
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
- } else
- ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid, NULL);
+ } else {
+ if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+ /* When multi-function is enabled, the ib_core gid
+ * indexes don't necessarily match the hw ones, so
+ * we must use our own cache
+ */
+ sqp->ud_header.grh.source_gid.global.subnet_prefix =
+ cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+ demux[sqp->qp.port - 1].
+ subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global.interface_id =
+ to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+ guid_cache[ah->av.ib.gid_index];
+ } else {
+ ib_get_cached_gid(ib_dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index,
+ &sqp->ud_header.grh.source_gid, NULL);
+ }
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 1188fef08450..79d017baf6f4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -552,12 +552,6 @@ repoll:
* from the table.
*/
mqp = __mlx5_qp_lookup(dev->mdev, qpn);
- if (unlikely(!mqp)) {
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
- cq->mcq.cqn, qpn);
- return -EINVAL;
- }
-
*cur_qp = to_mibqp(mqp);
}
@@ -618,13 +612,6 @@ repoll:
read_lock(&dev->mdev->priv.mkey_table.lock);
mmkey = __mlx5_mr_lookup(dev->mdev,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- if (unlikely(!mmkey)) {
- read_unlock(&dev->mdev->priv.mkey_table.lock);
- mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
- cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
- return -EINVAL;
- }
-
mr = to_mibmr(mmkey);
get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
mr->sig->sig_err_exists = true;
@@ -675,7 +662,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
unsigned long flags;
int soft_polled = 0;
int npolled;
- int err = 0;
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -687,8 +673,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
soft_polled = poll_soft_wc(cq, num_entries, wc);
for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
- err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
- if (err)
+ if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
break;
}
@@ -697,10 +682,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
out:
spin_unlock_irqrestore(&cq->lock, flags);
- if (err == 0 || err == -EAGAIN)
- return soft_polled + npolled;
- else
- return err;
+ return soft_polled + npolled;
}
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f4160d56dc4f..22174774dbb8 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -37,7 +37,6 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
-#include <linux/io-mapping.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
@@ -328,7 +327,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
- return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+ return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+ return 0;
}
enum {
@@ -1547,6 +1548,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
vlan_tag, 1);
@@ -2050,6 +2058,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
@@ -2085,7 +2094,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+ if (mqp->flags & MLX5_IB_QP_RSS)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 40df2cca0609..996b54e366b0 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
addr = addr >> page_shift;
tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
i = 0;
@@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
for (k = 0; k < len; k++) {
if (!(i & mask)) {
tmp = (unsigned long)pfn;
- m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp)));
+ m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
skip = 1 << m;
mask = skip - 1;
base = pfn;
@@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
} else {
if (base + p != pfn) {
tmp = (unsigned long)p;
- m = find_first_bit(&tmp, sizeof(tmp));
+ m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
base = pfn;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1df8a67d4f02..dcdcd195fe53 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -406,6 +406,7 @@ enum mlx5_ib_qp_flags {
/* QP uses 1 as its source QP number */
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
+ MLX5_IB_QP_RSS = 1 << 8,
};
struct mlx5_umr_wr {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 9d97a71a1335..41f4c2afbcdd 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1467,6 +1467,7 @@ create_tir:
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
+ qp->flags |= MLX5_IB_QP_RSS;
return 0;
err:
@@ -3744,12 +3745,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct ib_send_wr *wr, unsigned *idx,
int *size, int nreq)
{
- int err = 0;
-
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
- err = -ENOMEM;
- return err;
- }
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_get_send_wqe(qp, *idx);
@@ -3765,7 +3762,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
- return err;
+ return 0;
}
static void finish_wqe(struct mlx5_ib_qp *qp,
@@ -3844,7 +3841,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = wr->num_sge;
if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
+ err = -EINVAL;
*bad_wr = wr;
goto out;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 16740dcb876b..67fc0b6857e1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_srq =
(rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
- attr->max_send_sge = ((rsp->max_write_send_sge &
+ attr->max_send_sge = ((rsp->max_recv_send_sge &
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
- attr->max_recv_sge = (rsp->max_write_send_sge &
- OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
- OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+ attr->max_recv_sge = (rsp->max_recv_send_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT;
attr->max_srq_sge = (rsp->max_srq_rqe_sge &
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
- attr->max_rdma_sge = (rsp->max_write_send_sge &
- OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
- OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
+ attr->max_rdma_sge = (rsp->max_wr_rd_sge &
+ OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT;
attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 0efc9662c6d8..37df4481bb8f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -554,9 +554,9 @@ enum {
OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18,
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0,
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF,
- OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16,
- OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF <<
- OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
+ OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16,
+ OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF <<
+ OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT,
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0,
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF,
@@ -612,6 +612,8 @@ enum {
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0,
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF <<
OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+ OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0,
+ OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF,
};
struct ocrdma_mbx_query_config {
@@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config {
struct ocrdma_mbx_rsp rsp;
u32 qp_srq_cq_ird_ord;
u32 max_pd_ca_ack_delay;
- u32 max_write_send_sge;
+ u32 max_recv_send_sge;
u32 max_ird_ord_per_qp;
u32 max_shared_ird_ord;
u32 max_mr;
@@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config {
u32 max_wqes_rqes_per_q;
u32 max_cq_cqes_per_cq;
u32 max_srq_rqe_sge;
+ u32 max_wr_rd_sge;
+ u32 ird_pgsz_num_pages;
};
struct ocrdma_fw_ver_rsp {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 71d0534960d6..6af44f8db3d5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
- attr->max_sge = dev->attr.max_send_sge;
- attr->max_sge_rd = attr->max_sge;
+ attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+ attr->max_sge_rd = dev->attr.max_rdma_sge;
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
attr->max_mr = dev->attr.max_mr;
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index bbf0a163aeab..a3e21a25cea5 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
#include <linux/kthread.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include "qib_common.h"
@@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock;
extern struct qib_devdata *qib_lookup(int unit);
extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
-extern u16 qpt_mask;
extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
index 5e75b43c596b..5bad8e3b40bb 100644
--- a/drivers/infiniband/hw/qib/qib_debugfs.c
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
DEBUGFS_FILE(ctx_stats)
static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+ __acquires(RCU)
{
struct qib_qp_iter *iter;
loff_t n = *pos;
- rcu_read_lock();
iter = qib_qp_iter_init(s->private);
+
+ /* stop calls rcu_read_unlock */
+ rcu_read_lock();
+
if (!iter)
return NULL;
- while (n--) {
+ do {
if (qib_qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
- }
+ } while (n--);
return iter;
}
static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
loff_t *pos)
+ __must_hold(RCU)
{
struct qib_qp_iter *iter = iter_ptr;
@@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
}
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+ __releases(RCU)
{
rcu_read_unlock();
}
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 67ee6438cf59..728e0a030d2e 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
ret = 1;
else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
/* For TIDERR and RC QPs premptively schedule a NAK */
- struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
- struct qib_other_headers *ohdr = NULL;
+ struct ib_header *hdr = (struct ib_header *)rhdr;
+ struct ib_other_headers *ohdr = NULL;
struct qib_ibport *ibp = &ppd->ibport_data;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -588,8 +588,7 @@ move_along:
qib_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
bail:
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index fcdf37913a26..c3edc033f7c4 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
pos = *ppos;
- if (pos != 0) {
- ret = -EINVAL;
- goto bail;
- }
-
- if (count != sizeof(struct qib_flash)) {
- ret = -EINVAL;
- goto bail;
- }
-
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto bail;
- }
+ if (pos != 0 || count != sizeof(struct qib_flash))
+ return -EINVAL;
- if (copy_from_user(tmp, buf, count)) {
- ret = -EFAULT;
- goto bail_tmp;
- }
+ tmp = memdup_user(buf, count);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
dd = private2dd(file);
if (qib_eeprom_write(dd, pos, tmp, count)) {
@@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf,
bail_tmp:
kfree(tmp);
-
-bail:
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ce4034071f9c..ded27172320e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd)
u32 *hdr;
u64 pbc;
const unsigned hdrwords = 7;
- static struct qib_ib_header ibhdr = {
+ static struct ib_header ibhdr = {
.lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH),
.lrh[1] = IB_LID_PERMISSIVE,
.lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC),
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 9cc0aae1d781..99d31efe4c2f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -41,14 +41,6 @@
#include "qib.h"
-/*
- * mask field which was present in now deleted qib_qpn_table
- * is not present in rvt_qpn_table. Defining the same field
- * as qpt_mask here instead of adding the mask field to
- * rvt_qpn_table.
- */
-u16 qpt_mask;
-
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off)
{
@@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off,
- unsigned n)
+ unsigned n, u16 qpt_mask)
{
if (qpt_mask) {
off++;
@@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
verbs_dev);
+ u16 qpt_mask = dd->qpn_mask;
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
goto bail;
}
offset = find_next_offset(qpt, map, offset,
- dd->n_krcv_queues);
+ dd->n_krcv_queues, qpt_mask);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
@@ -573,10 +566,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
return NULL;
iter->dev = dev;
- if (qib_qp_iter_next(iter)) {
- kfree(iter);
- return NULL;
- }
return iter;
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 444028a3582a..2097512e75aa 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp)
* Note the QP s_lock must be held.
*/
static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
- struct qib_other_headers *ohdr, u32 pmtu)
+ struct ib_other_headers *ohdr, u32 pmtu)
{
struct rvt_ack_entry *e;
u32 hwords;
@@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = qib_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn & QIB_PSN_MASK;
e->sent = 1;
@@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
u32 hwords;
@@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
}
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp)
u32 hwords;
u32 pbufn;
u32 __iomem *piobuf;
- struct qib_ib_header hdr;
- struct qib_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
u32 control;
unsigned long flags;
@@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
@@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void qib_rc_rcv_resp(struct qib_ibport *ibp,
- struct qib_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen,
struct rvt_qp *qp,
u32 opcode,
@@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64) be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
@@ -1608,7 +1600,7 @@ bail:
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
void *data,
struct rvt_qp *qp,
u32 opcode,
@@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
*/
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
* for the given QP.
* Called at interrupt level.
*/
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
@@ -2177,8 +2169,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2189,11 +2180,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2233,7 +2224,7 @@ rnr_nak:
/* Queue RNR NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2245,7 +2236,7 @@ nack_op_err:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2259,7 +2250,7 @@ nack_inv:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b67779256297..de1bde5950f5 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the qib_migrate_qp() call.
*/
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2)
{
struct qib_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 1d61bd04f449..5b2d483451ad 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -48,7 +48,7 @@
int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords;
u32 bth0;
@@ -236,10 +236,10 @@ bail:
* for the given QP.
* Called at interrupt level.
*/
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 10d062561bd9..f45cad1198b0 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -245,7 +245,7 @@ drop:
int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
@@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
* for the given QP.
* Called at interrupt level.
*/
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 pad;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 2d7e52619b55..954f15064514 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
* for the given QP.
* Called at interrupt level.
*/
-static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
@@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
{
struct qib_pportdata *ppd = rcd->ppd;
struct qib_ibport *ibp = &ppd->ibport_data;
- struct qib_ib_header *hdr = rhdr;
+ struct ib_header *hdr = rhdr;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_qp *qp;
u32 qp_num;
int lnh;
@@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
if (tx->wqe)
qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct qib_ib_header *hdr;
+ struct ib_header *hdr;
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
hdr = &tx->align_buf->hdr;
@@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1133,7 +1133,7 @@ done:
* Return zero if packet is sent or queued OK.
* Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
*/
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1607,8 +1607,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
/* Only need to initialize non-zero fields. */
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
- qpt_mask = dd->qpn_mask;
-
INIT_LIST_HEAD(&dev->piowait);
INIT_LIST_HEAD(&dev->dmawait);
INIT_LIST_HEAD(&dev->txwait);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 736ced684842..94fd30fdedac 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_cq.h>
@@ -63,16 +64,6 @@ struct qib_verbs_txreq;
*/
#define QIB_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -87,22 +78,9 @@ struct qib_verbs_txreq;
#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK (1 << 31)
-#define IB_BTH_SOLICITED (1 << 23)
-#define IB_BTH_MIG_REQ (1 << 22)
-
/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
/* Values for set/get portinfo VLCap OperationalVLs */
@@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls)
}
}
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-struct qib_other_headers {
- __be32 bth[3];
- union {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
- } u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct qib_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct qib_other_headers oth;
- } l;
- struct qib_other_headers oth;
- } u;
-} __packed;
-
struct qib_pio_header {
__le32 pbc[2];
- struct qib_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -191,7 +117,7 @@ struct qib_pio_header {
* is made common.
*/
struct qib_qp_priv {
- struct qib_ib_header *s_hdr; /* next packet header to send */
+ struct ib_header *s_hdr; /* next packet header to send */
struct list_head iowait; /* link for wait PIO buf */
atomic_t s_dma_busy;
struct qib_verbs_txreq *s_tx;
@@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
void qib_put_txreq(struct qib_verbs_txreq *tx);
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len);
void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
@@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
@@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg);
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void mr_rcu_callback(struct rcu_head *list);
@@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
void qib_migrate_qp(struct rvt_qp *qp);
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2);
void _qib_do_send(struct work_struct *work);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index c229b9f4a52d..0a89a955550b 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -664,7 +664,8 @@ static int __init usnic_ib_init(void)
return err;
}
- if (pci_register_driver(&usnic_ib_pci_driver)) {
+ err = pci_register_driver(&usnic_ib_pci_driver);
+ if (err) {
usnic_err("Unable to register with PCI\n");
goto out_umem_fini;
}
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
index 33076a5eee2f..01f71caa3ac4 100644
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -138,6 +138,21 @@ static void rvt_unmap_sg(struct ib_device *dev,
/* This is a stub, nothing to be done here */
}
+static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return rvt_map_sg(dev, sgl, nents, direction);
+}
+
+static void rvt_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return rvt_unmap_sg(dev, sg, nents, direction);
+}
+
static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
@@ -177,6 +192,8 @@ struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
.unmap_page = rvt_dma_unmap_page,
.map_sg = rvt_map_sg,
.unmap_sg = rvt_unmap_sg,
+ .map_sg_attrs = rvt_map_sg_attrs,
+ .unmap_sg_attrs = rvt_unmap_sg_attrs,
.sync_single_for_cpu = rvt_sync_single_for_cpu,
.sync_single_for_device = rvt_sync_single_for_device,
.alloc_coherent = rvt_dma_alloc_coherent,
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 80c4b6b401b8..46b64970058e 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr)
{
rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr);
- vfree(mr);
+ kfree(mr);
}
/**
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index bdb540f25a88..6500c3b5a89c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
if (removed) {
synchronize_rcu();
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
+ * rvt_init_qp - initialize the QP state to the reset state
+ * @qp: the QP to init or reinit
* @type: the QP type
- * r and s lock are required to be held by the caller
+ *
+ * This function is called from both rvt_create_qp() and
+ * rvt_reset_qp(). The difference is that the reset
+ * patch the necessary locks to protect against concurent
+ * access.
*/
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- enum ib_qp_type type)
- __releases(&qp->s_lock)
- __releases(&qp->s_hlock)
- __releases(&qp->r_lock)
- __acquires(&qp->r_lock)
- __acquires(&qp->s_hlock)
- __acquires(&qp->s_lock)
+static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
{
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
-
- /* Let drivers flush their waitlist */
- rdi->driver_f.flush_qp_waiters(qp);
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
-
- /* Stop the send queue and the retry timer */
- rdi->driver_f.stop_send_queue(qp);
-
- /* Wait for things to stop */
- rdi->driver_f.quiesce_qp(qp);
-
- /* take qp out the hash and wait for it to be unused */
- rvt_remove_qp(rdi, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
-
- /* grab the lock b/c it was locked at call time */
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
-
- rvt_clear_mr_refs(qp, 1);
- }
-
- /*
- * Let the driver do any tear down it needs to for a qp
- * that has been reset
- */
- rdi->driver_f.notify_qp_reset(qp);
-
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
@@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
}
/**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * r_lock, s_hlock, and s_lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+ __must_hold(&qp->s_lock)
+ __must_hold(&qp->s_hlock)
+ __must_hold(&qp->r_lock)
+{
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_hlock);
+ lockdep_assert_held(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+
+ /* Let drivers flush their waitlist */
+ rdi->driver_f.flush_qp_waiters(qp);
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+
+ /* Stop the send queue and the retry timer */
+ rdi->driver_f.stop_send_queue(qp);
+
+ /* Wait for things to stop */
+ rdi->driver_f.quiesce_qp(qp);
+
+ /* take qp out the hash and wait for it to be unused */
+ rvt_remove_qp(rdi, qp);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ /* grab the lock b/c it was locked at call time */
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+
+ rvt_clear_mr_refs(qp, 1);
+ /*
+ * Let the driver do any tear down or re-init it needs to for
+ * a qp that has been reset
+ */
+ rdi->driver_f.notify_qp_reset(qp);
+ }
+ rvt_init_qp(rdi, qp, type);
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_hlock);
+ lockdep_assert_held(&qp->s_lock);
+}
+
+/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
}
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
- rvt_reset_qp(rdi, qp, init_attr->qp_type);
+ rvt_init_qp(rdi, qp, init_attr->qp_type);
break;
default:
@@ -873,7 +890,8 @@ bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
- vfree(qp->r_rq.wq);
+ if (!qp->ip)
+ vfree(qp->r_rq.wq);
bail_driver_priv:
rdi->driver_f.qp_priv_free(rdi, qp);
@@ -905,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
int ret = 0;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
goto bail;
@@ -979,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
unsigned long flags;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
if (qp->ibqp.qp_num <= 1) {
@@ -996,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
}
/**
- * qib_modify_qp - modify the attributes of a queue pair
+ * rvt_modify_qp - modify the attributes of a queue pair
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 55f0e8f0ca79..ab6c3c25d7ff 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -358,18 +358,15 @@ static int __init rxe_module_init(void)
/* initialize slab caches for managed objects */
err = rxe_cache_init();
if (err) {
- pr_err("rxe: unable to init object pools\n");
+ pr_err("unable to init object pools\n");
return err;
}
err = rxe_net_init();
- if (err) {
- pr_err("rxe: unable to init\n");
- rxe_cache_exit();
+ if (err)
return err;
- }
- pr_info("rxe: loaded\n");
+ pr_info("loaded\n");
return 0;
}
@@ -379,8 +376,8 @@ static void __exit rxe_module_exit(void)
rxe_net_exit();
rxe_cache_exit();
- pr_info("rxe: unloaded\n");
+ pr_info("unloaded\n");
}
-module_init(rxe_module_init);
+late_initcall(rxe_module_init);
module_exit(rxe_module_exit);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 12c71c549f97..a696af81e4a5 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -34,6 +34,11 @@
#ifndef RXE_H
#define RXE_H
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/crc32.h>
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 5c9474212d4e..604f6fee96bd 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -39,7 +39,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
struct rxe_port *port;
if (attr->port_num != 1) {
- pr_info("rxe: invalid port_num = %d\n", attr->port_num);
+ pr_info("invalid port_num = %d\n", attr->port_num);
return -EINVAL;
}
@@ -47,7 +47,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr)
if (attr->ah_flags & IB_AH_GRH) {
if (attr->grh.sgid_index > port->attr.gid_tbl_len) {
- pr_info("rxe: invalid sgid index = %d\n",
+ pr_info("invalid sgid index = %d\n",
attr->grh.sgid_index);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 36f67de44095..6c5e29db88e3 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -567,7 +567,8 @@ int rxe_completer(void *arg)
state = COMPST_GET_ACK;
while (1) {
- pr_debug("state = %s\n", comp_state_name[state]);
+ pr_debug("qp#%d state = %s\n", qp_num(qp),
+ comp_state_name[state]);
switch (state) {
case COMPST_GET_ACK:
skb = skb_dequeue(&qp->resp_pkts);
@@ -689,7 +690,14 @@ int rxe_completer(void *arg)
qp->req.need_retry = 1;
rxe_run_task(&qp->req.task, 1);
}
+
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ }
+
goto exit;
+
} else {
wqe->status = IB_WC_RETRY_EXC_ERR;
state = COMPST_ERROR;
@@ -702,7 +710,8 @@ int rxe_completer(void *arg)
qp->comp.rnr_retry--;
qp->req.need_retry = 1;
- pr_debug("set rnr nak timer\n");
+ pr_debug("qp#%d set rnr nak timer\n",
+ qp_num(qp));
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
@@ -716,6 +725,12 @@ int rxe_completer(void *arg)
case COMPST_ERROR:
do_complete(qp, wqe);
rxe_qp_error(qp);
+
+ if (pkt) {
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ }
+
goto exit;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c
index 7634c1a81b2b..a0f8af5851ae 100644
--- a/drivers/infiniband/sw/rxe/rxe_dma.c
+++ b/drivers/infiniband/sw/rxe/rxe_dma.c
@@ -117,6 +117,21 @@ static void rxe_unmap_sg(struct ib_device *dev,
WARN_ON(!valid_dma_direction(direction));
}
+static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return rxe_map_sg(dev, sgl, nents, direction);
+}
+
+static void rxe_unmap_sg_attrs(struct ib_device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ rxe_unmap_sg(dev, sg, nents, direction);
+}
+
static void rxe_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size, enum dma_data_direction dir)
@@ -159,6 +174,8 @@ struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
.unmap_page = rxe_dma_unmap_page,
.map_sg = rxe_map_sg,
.unmap_sg = rxe_unmap_sg,
+ .map_sg_attrs = rxe_map_sg_attrs,
+ .unmap_sg_attrs = rxe_unmap_sg_attrs,
.sync_single_for_cpu = rxe_sync_single_for_cpu,
.sync_single_for_device = rxe_sync_single_for_device,
.alloc_coherent = rxe_dma_alloc_coherent,
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 4a5484ef604f..73849a5a91b3 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -198,7 +198,7 @@ void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
{
qp->resp.res_head++;
- if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic))
+ if (unlikely(qp->resp.res_head == qp->attr.max_dest_rd_atomic))
qp->resp.res_head = 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 54b3c7c99eff..c572a4c09359 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -126,7 +126,7 @@ found_it:
ret = remap_vmalloc_range(vma, ip->obj, 0);
if (ret) {
- pr_err("rxe: err %d from remap_vmalloc_range\n", ret);
+ pr_err("err %d from remap_vmalloc_range\n", ret);
goto done;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index f3dab6574504..1869152f1d23 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -39,7 +39,7 @@
*/
static u8 rxe_get_key(void)
{
- static unsigned key = 1;
+ static u32 key = 1;
key = key << 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0b8d2ea8b41d..b8258e4f0aea 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -65,7 +65,7 @@ struct rxe_dev *net_to_rxe(struct net_device *ndev)
return found;
}
-struct rxe_dev *get_rxe_by_name(const char* name)
+struct rxe_dev *get_rxe_by_name(const char *name)
{
struct rxe_dev *rxe;
struct rxe_dev *found = NULL;
@@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
return sock;
}
-static void rxe_release_udp_tunnel(struct socket *sk)
+void rxe_release_udp_tunnel(struct socket *sk)
{
- udp_tunnel_sock_release(sk);
+ if (sk)
+ udp_tunnel_sock_release(sk);
}
static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
@@ -349,14 +350,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
}
-static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
+static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, struct rxe_av *av)
{
struct dst_entry *dst;
bool xnet = false;
__be16 df = htons(IP_DF);
struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
- struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
dst = rxe_find_route4(rxe->ndev, saddr, daddr);
if (!dst) {
@@ -375,12 +376,12 @@ static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
return 0;
}
-static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av)
+static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, struct rxe_av *av)
{
struct dst_entry *dst;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
- struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
dst = rxe_find_route6(rxe->ndev, saddr, daddr);
if (!dst) {
@@ -407,9 +408,9 @@ static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct rxe_av *av = rxe_get_av(pkt);
if (av->network_type == RDMA_NETWORK_IPV4)
- err = prepare4(rxe, skb, av);
+ err = prepare4(rxe, pkt, skb, av);
else if (av->network_type == RDMA_NETWORK_IPV6)
- err = prepare6(rxe, skb, av);
+ err = prepare6(rxe, pkt, skb, av);
*crc = rxe_icrc_hdr(pkt, skb);
@@ -600,8 +601,7 @@ void rxe_port_up(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
- pr_info("rxe: set %s active\n", rxe->ib_dev.name);
- return;
+ pr_info("set %s active\n", rxe->ib_dev.name);
}
/* Caller must hold net_info_lock */
@@ -614,8 +614,7 @@ void rxe_port_down(struct rxe_dev *rxe)
port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
- pr_info("rxe: set %s down\n", rxe->ib_dev.name);
- return;
+ pr_info("set %s down\n", rxe->ib_dev.name);
}
static int rxe_notify(struct notifier_block *not_blk,
@@ -640,7 +639,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_port_down(rxe);
break;
case NETDEV_CHANGEMTU:
- pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu);
+ pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_REBOOT:
@@ -650,7 +649,7 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
default:
- pr_info("rxe: ignoring netdev event = %ld for %s\n",
+ pr_info("ignoring netdev event = %ld for %s\n",
event, ndev->name);
break;
}
@@ -658,51 +657,68 @@ out:
return NOTIFY_OK;
}
-static struct notifier_block rxe_net_notifier = {
+struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
-int rxe_net_init(void)
+int rxe_net_ipv4_init(void)
{
- int err;
-
spin_lock_init(&dev_list_lock);
- recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), true);
- if (IS_ERR(recv_sockets.sk6)) {
- recv_sockets.sk6 = NULL;
- pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
- return -1;
- }
-
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
- htons(ROCE_V2_UDP_DPORT), false);
+ htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) {
- rxe_release_udp_tunnel(recv_sockets.sk6);
recv_sockets.sk4 = NULL;
- recv_sockets.sk6 = NULL;
- pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
+ pr_err("Failed to create IPv4 UDP tunnel\n");
return -1;
}
- err = register_netdevice_notifier(&rxe_net_notifier);
- if (err) {
- rxe_release_udp_tunnel(recv_sockets.sk6);
- rxe_release_udp_tunnel(recv_sockets.sk4);
- pr_err("rxe: Failed to rigister netdev notifier\n");
- }
+ return 0;
+}
- return err;
+int rxe_net_ipv6_init(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+
+ spin_lock_init(&dev_list_lock);
+
+ recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
+ htons(ROCE_V2_UDP_DPORT), true);
+ if (IS_ERR(recv_sockets.sk6)) {
+ recv_sockets.sk6 = NULL;
+ pr_err("Failed to create IPv6 UDP tunnel\n");
+ return -1;
+ }
+#endif
+ return 0;
}
void rxe_net_exit(void)
{
- if (recv_sockets.sk6)
- rxe_release_udp_tunnel(recv_sockets.sk6);
+ rxe_release_udp_tunnel(recv_sockets.sk6);
+ rxe_release_udp_tunnel(recv_sockets.sk4);
+ unregister_netdevice_notifier(&rxe_net_notifier);
+}
- if (recv_sockets.sk4)
- rxe_release_udp_tunnel(recv_sockets.sk4);
+int rxe_net_init(void)
+{
+ int err;
- unregister_netdevice_notifier(&rxe_net_notifier);
+ recv_sockets.sk6 = NULL;
+
+ err = rxe_net_ipv4_init();
+ if (err)
+ return err;
+ err = rxe_net_ipv6_init();
+ if (err)
+ goto err_out;
+ err = register_netdevice_notifier(&rxe_net_notifier);
+ if (err) {
+ pr_err("Failed to register netdev notifier\n");
+ goto err_out;
+ }
+ return 0;
+err_out:
+ rxe_net_exit();
+ return err;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 7b06f76d16cc..1c06b3bfe1b6 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -44,6 +44,8 @@ struct rxe_recv_sockets {
};
extern struct rxe_recv_sockets recv_sockets;
+extern struct notifier_block rxe_net_notifier;
+void rxe_release_udp_tunnel(struct socket *sk);
struct rxe_dev *rxe_net_add(struct net_device *ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 22ba24f2a2c1..b8036cfbce04 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -146,7 +146,7 @@ static void free_rd_atomic_resources(struct rxe_qp *qp)
if (qp->resp.resources) {
int i;
- for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
free_rd_atomic_resource(qp, res);
@@ -174,7 +174,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp)
struct resp_res *res;
if (qp->resp.resources) {
- for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
res = &qp->resp.resources[i];
free_rd_atomic_resource(qp, res);
}
@@ -298,8 +298,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
wqe_size = rcv_wqe_size(qp->rq.max_sge);
- pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n",
- qp->rq.max_wr, qp->rq.max_sge, wqe_size);
+ pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
+ qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
qp->rq.queue = rxe_queue_init(rxe,
&qp->rq.max_wr,
@@ -596,14 +596,21 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
+ qp->attr.max_rd_atomic = max_rd_atomic;
+ atomic_set(&qp->req.rd_atomic, max_rd_atomic);
+ }
+
+ if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ int max_dest_rd_atomic =
+ __roundup_pow_of_two(attr->max_dest_rd_atomic);
+
+ qp->attr.max_dest_rd_atomic = max_dest_rd_atomic;
+
free_rd_atomic_resources(qp);
- err = alloc_rd_atomic_resources(qp, max_rd_atomic);
+ err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic);
if (err)
return err;
-
- qp->attr.max_rd_atomic = max_rd_atomic;
- atomic_set(&qp->req.rd_atomic, max_rd_atomic);
}
if (mask & IB_QP_CUR_STATE)
@@ -673,24 +680,27 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
if (mask & IB_QP_RETRY_CNT) {
qp->attr.retry_cnt = attr->retry_cnt;
qp->comp.retry_cnt = attr->retry_cnt;
- pr_debug("set retry count = %d\n", attr->retry_cnt);
+ pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
+ attr->retry_cnt);
}
if (mask & IB_QP_RNR_RETRY) {
qp->attr.rnr_retry = attr->rnr_retry;
qp->comp.rnr_retry = attr->rnr_retry;
- pr_debug("set rnr retry count = %d\n", attr->rnr_retry);
+ pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
+ attr->rnr_retry);
}
if (mask & IB_QP_RQ_PSN) {
qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
qp->resp.psn = qp->attr.rq_psn;
- pr_debug("set resp psn = 0x%x\n", qp->resp.psn);
+ pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
+ qp->resp.psn);
}
if (mask & IB_QP_MIN_RNR_TIMER) {
qp->attr.min_rnr_timer = attr->min_rnr_timer;
- pr_debug("set min rnr timer = 0x%x\n",
+ pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
attr->min_rnr_timer);
}
@@ -698,12 +708,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
qp->req.psn = qp->attr.sq_psn;
qp->comp.psn = qp->attr.sq_psn;
- pr_debug("set req psn = 0x%x\n", qp->req.psn);
- }
-
- if (mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- qp->attr.max_dest_rd_atomic =
- __roundup_pow_of_two(attr->max_dest_rd_atomic);
+ pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
}
if (mask & IB_QP_PATH_MIG_STATE)
@@ -717,38 +722,38 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
switch (attr->qp_state) {
case IB_QPS_RESET:
- pr_debug("qp state -> RESET\n");
+ pr_debug("qp#%d state -> RESET\n", qp_num(qp));
rxe_qp_reset(qp);
break;
case IB_QPS_INIT:
- pr_debug("qp state -> INIT\n");
+ pr_debug("qp#%d state -> INIT\n", qp_num(qp));
qp->req.state = QP_STATE_INIT;
qp->resp.state = QP_STATE_INIT;
break;
case IB_QPS_RTR:
- pr_debug("qp state -> RTR\n");
+ pr_debug("qp#%d state -> RTR\n", qp_num(qp));
qp->resp.state = QP_STATE_READY;
break;
case IB_QPS_RTS:
- pr_debug("qp state -> RTS\n");
+ pr_debug("qp#%d state -> RTS\n", qp_num(qp));
qp->req.state = QP_STATE_READY;
break;
case IB_QPS_SQD:
- pr_debug("qp state -> SQD\n");
+ pr_debug("qp#%d state -> SQD\n", qp_num(qp));
rxe_qp_drain(qp);
break;
case IB_QPS_SQE:
- pr_warn("qp state -> SQE !!?\n");
+ pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
/* Not possible from modify_qp. */
break;
case IB_QPS_ERR:
- pr_debug("qp state -> ERR\n");
+ pr_debug("qp#%d state -> ERR\n", qp_num(qp));
rxe_qp_error(qp);
break;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 3d464c23e08b..46f062842a9a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* make a copy of the skb to post to the next qp
*/
skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
- skb_clone(skb, GFP_KERNEL) : NULL;
+ skb_clone(skb, GFP_ATOMIC) : NULL;
pkt->qp = qp;
rxe_add_ref(qp);
@@ -387,7 +387,8 @@ int rxe_rcv(struct sk_buff *skb)
pack_icrc = be32_to_cpu(*icrcp);
calc_icrc = rxe_icrc_hdr(pkt, skb);
- calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt));
+ calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt),
+ payload_size(pkt));
calc_icrc = cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
char saddr[sizeof(struct in6_addr)];
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 33b2d9d77021..832846b73ea0 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -38,7 +38,7 @@
#include "rxe_queue.h"
static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- unsigned opcode);
+ u32 opcode);
static inline void retry_first_write_send(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
@@ -121,7 +121,7 @@ void rnr_nak_timer(unsigned long data)
{
struct rxe_qp *qp = (struct rxe_qp *)data;
- pr_debug("rnr nak timer fired\n");
+ pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp));
rxe_run_task(&qp->req.task, 1);
}
@@ -187,7 +187,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
return wqe;
}
-static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)
+static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
{
switch (opcode) {
case IB_WR_RDMA_WRITE:
@@ -259,7 +259,7 @@ static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits)
return -EINVAL;
}
-static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)
+static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits)
{
switch (opcode) {
case IB_WR_RDMA_WRITE:
@@ -311,7 +311,7 @@ static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits)
}
static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- unsigned opcode)
+ u32 opcode)
{
int fits = (wqe->dma.resid <= qp->mtu);
@@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
static void update_wqe_state(struct rxe_qp *qp,
- struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt,
- enum wqe_state *prev_state)
+ struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt)
{
- enum wqe_state prev_state_ = wqe->state;
-
if (pkt->mask & RXE_END_MASK) {
if (qp_type(qp) == IB_QPT_RC)
wqe->state = wqe_state_pending;
} else {
wqe->state = wqe_state_processing;
}
-
- *prev_state = prev_state_;
}
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
- struct rxe_pkt_info *pkt, int payload)
+static void update_wqe_psn(struct rxe_qp *qp,
+ struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt,
+ int payload)
{
/* number of packets left to send including current one */
int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
else
qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+}
- qp->req.opcode = pkt->opcode;
+static void save_state(struct rxe_send_wqe *wqe,
+ struct rxe_qp *qp,
+ struct rxe_send_wqe *rollback_wqe,
+ struct rxe_qp *rollback_qp)
+{
+ rollback_wqe->state = wqe->state;
+ rollback_wqe->first_psn = wqe->first_psn;
+ rollback_wqe->last_psn = wqe->last_psn;
+ rollback_qp->req.psn = qp->req.psn;
+}
+static void rollback_state(struct rxe_send_wqe *wqe,
+ struct rxe_qp *qp,
+ struct rxe_send_wqe *rollback_wqe,
+ struct rxe_qp *rollback_qp)
+{
+ wqe->state = rollback_wqe->state;
+ wqe->first_psn = rollback_wqe->first_psn;
+ wqe->last_psn = rollback_wqe->last_psn;
+ qp->req.psn = rollback_qp->req.psn;
+}
+
+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+ struct rxe_pkt_info *pkt, int payload)
+{
+ qp->req.opcode = pkt->opcode;
if (pkt->mask & RXE_END_MASK)
qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
@@ -566,12 +588,13 @@ int rxe_requester(void *arg)
struct rxe_pkt_info pkt;
struct sk_buff *skb;
struct rxe_send_wqe *wqe;
- unsigned mask;
+ enum rxe_hdr_mask mask;
int payload;
int mtu;
int opcode;
int ret;
- enum wqe_state prev_state;
+ struct rxe_qp rollback_qp;
+ struct rxe_send_wqe rollback_wqe;
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -603,7 +626,8 @@ next_wqe:
rmr = rxe_pool_get_index(&rxe->mr_pool,
wqe->wr.ex.invalidate_rkey >> 8);
if (!rmr) {
- pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey);
+ pr_err("No mr for key %#x\n",
+ wqe->wr.ex.invalidate_rkey);
wqe->state = wqe_state_error;
wqe->status = IB_WC_MW_BIND_ERR;
goto exit;
@@ -679,22 +703,30 @@ next_wqe:
skb = init_req_packet(qp, wqe, opcode, payload, &pkt);
if (unlikely(!skb)) {
- pr_err("Failed allocating skb\n");
+ pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
goto err;
}
if (fill_packet(qp, wqe, &pkt, skb, payload)) {
- pr_debug("Error during fill packet\n");
+ pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
goto err;
}
- update_wqe_state(qp, wqe, &pkt, &prev_state);
+ /*
+ * To prevent a race on wqe access between requester and completer,
+ * wqe members state and psn need to be set before calling
+ * rxe_xmit_packet().
+ * Otherwise, completer might initiate an unjustified retry flow.
+ */
+ save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ update_wqe_state(qp, wqe, &pkt);
+ update_wqe_psn(qp, wqe, &pkt, payload);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
kfree_skb(skb);
- wqe->state = prev_state;
+ rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ebb03b46e2ad..dd3d88adc003 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -383,7 +383,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
* too many read/atomic ops, we just
* recycle the responder resource queue
*/
- if (likely(qp->attr.max_rd_atomic > 0))
+ if (likely(qp->attr.max_dest_rd_atomic > 0))
return RESPST_CHK_LENGTH;
else
return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
@@ -749,6 +749,18 @@ static enum resp_states read_reply(struct rxe_qp *qp,
return state;
}
+static void build_rdma_network_hdr(union rdma_network_hdr *hdr,
+ struct rxe_pkt_info *pkt)
+{
+ struct sk_buff *skb = PKT_TO_SKB(pkt);
+
+ memset(hdr, 0, sizeof(*hdr));
+ if (skb->protocol == htons(ETH_P_IP))
+ memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh));
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh));
+}
+
/* Executes a new request. A retried request never reach that function (send
* and writes are discarded, and reads and atomics are retried elsewhere.
*/
@@ -761,13 +773,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
union rdma_network_hdr hdr;
- struct sk_buff *skb = PKT_TO_SKB(pkt);
- memset(&hdr, 0, sizeof(hdr));
- if (skb->protocol == htons(ETH_P_IP))
- memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh));
- else if (skb->protocol == htons(ETH_P_IPV6))
- memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh));
+ build_rdma_network_hdr(&hdr, pkt);
err = send_data_in(qp, &hdr, sizeof(hdr));
if (err)
@@ -881,7 +888,8 @@ static enum resp_states do_complete(struct rxe_qp *qp,
rmr = rxe_pool_get_index(&rxe->mr_pool,
wc->ex.invalidate_rkey >> 8);
if (unlikely(!rmr)) {
- pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey);
+ pr_err("Bad rkey %#x invalidation\n",
+ wc->ex.invalidate_rkey);
return RESPST_ERROR;
}
rmr->state = RXE_MEM_STATE_FREE;
@@ -972,11 +980,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
free_rd_atomic_resource(qp, res);
rxe_advance_resp_resource(qp);
+ memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
- res->first_psn = qp->resp.psn;
- res->last_psn = qp->resp.psn;
- res->cur_psn = qp->resp.psn;
+ res->first_psn = ack_pkt.psn;
+ res->last_psn = ack_pkt.psn;
+ res->cur_psn = ack_pkt.psn;
rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
if (rc) {
@@ -1116,8 +1126,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
rc = RESPST_CLEANUP;
goto out;
}
- bth_set_psn(SKB_TO_PKT(skb_copy),
- qp->resp.psn - 1);
+
/* Resend the result. */
rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
pkt, skb_copy);
@@ -1207,7 +1216,8 @@ int rxe_responder(void *arg)
}
while (1) {
- pr_debug("state = %s\n", resp_state_name[state]);
+ pr_debug("qp#%d state = %s\n", qp_num(qp),
+ resp_state_name[state]);
switch (state) {
case RESPST_GET_REQ:
state = get_req(qp, &pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index cf8e77800046..d5ed7571128f 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -79,7 +79,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
- pr_err("rxe: add: invalid interface name\n");
+ pr_err("add: invalid interface name\n");
err = -EINVAL;
goto err;
}
@@ -92,20 +92,20 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
}
if (net_to_rxe(ndev)) {
- pr_err("rxe: already configured on %s\n", intf);
+ pr_err("already configured on %s\n", intf);
err = -EINVAL;
goto err;
}
rxe = rxe_net_add(ndev);
if (!rxe) {
- pr_err("rxe: failed to add %s\n", intf);
+ pr_err("failed to add %s\n", intf);
err = -EINVAL;
goto err;
}
rxe_set_port_state(ndev);
- pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf);
+ pr_info("added %s to %s\n", rxe->ib_dev.name, intf);
err:
if (ndev)
dev_put(ndev);
@@ -120,7 +120,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
len = sanitize_arg(val, intf, sizeof(intf));
if (!len) {
- pr_err("rxe: add: invalid interface name\n");
+ pr_err("add: invalid interface name\n");
return -EINVAL;
}
@@ -133,7 +133,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp)
rxe = get_rxe_by_name(intf);
if (!rxe) {
- pr_err("rxe: not configured on %s\n", intf);
+ pr_err("not configured on %s\n", intf);
return -EINVAL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 4552be960c6a..19841c863daf 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -100,10 +100,12 @@ static int rxe_query_port(struct ib_device *dev,
rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd);
speed = cmd.speed;
} else {
- pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name);
+ pr_warn("%s speed is unknown, defaulting to 1000\n",
+ rxe->ndev->name);
speed = 1000;
}
- rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width);
+ rxe_eth_speed_to_ib_speed(speed, &attr->active_speed,
+ &attr->active_width);
mutex_unlock(&rxe->usdev_lock);
return 0;
@@ -761,7 +763,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
}
static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
- unsigned mask, u32 length)
+ unsigned int mask, u32 length)
{
int err;
struct rxe_sq *sq = &qp->sq;
@@ -801,26 +803,15 @@ err1:
return err;
}
-static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
{
int err = 0;
- struct rxe_qp *qp = to_rqp(ibqp);
unsigned int mask;
unsigned int length = 0;
int i;
int must_sched;
- if (unlikely(!qp->valid)) {
- *bad_wr = wr;
- return -EINVAL;
- }
-
- if (unlikely(qp->req.state < QP_STATE_READY)) {
- *bad_wr = wr;
- return -EINVAL;
- }
-
while (wr) {
mask = wr_opcode_mask(wr->opcode, qp);
if (unlikely(!mask)) {
@@ -861,6 +852,29 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return err;
}
+static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct rxe_qp *qp = to_rqp(ibqp);
+
+ if (unlikely(!qp->valid)) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ if (unlikely(qp->req.state < QP_STATE_READY)) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ if (qp->is_user) {
+ /* Utilize process context to do protocol processing */
+ rxe_run_task(&qp->req.task, 0);
+ return 0;
+ } else
+ return rxe_post_send_kernel(qp, wr, bad_wr);
+}
+
static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr)
{
@@ -1133,8 +1147,8 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
return 0;
}
-static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
- unsigned int *sg_offset)
+static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
{
struct rxe_mem *mr = to_rmr(ibmr);
int n;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 7899167536e3..7b8d2d9e2263 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn);
void ipoib_reap_ah(struct work_struct *work);
+struct ipoib_path *__path_find(struct net_device *dev, void *gid);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 951d9abcca8b..4ad297d3de89 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
}
}
+#define QPN_AND_OPTIONS_OFFSET 4
+
static void ipoib_cm_tx_start(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ipoib_neigh *neigh;
struct ipoib_cm_tx *p;
unsigned long flags;
+ struct ipoib_path *path;
int ret;
struct ib_sa_path_rec pathrec;
@@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work)
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
+
qpn = IPOIB_QPN(neigh->daddr);
+ /*
+ * As long as the search is with these 2 locks,
+ * path existence indicates its validity.
+ */
+ path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+ if (!path) {
+ pr_info("%s ignore not valid path %pI6\n",
+ __func__,
+ neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+ goto free_neigh;
+ }
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
spin_lock_irqsave(&priv->lock, flags);
if (ret) {
+free_neigh:
neigh = p->neigh;
if (neigh) {
neigh->cm = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index dc6d241b9406..be11d5d5b8c1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
}
if (level == IPOIB_FLUSH_LIGHT) {
+ int oper_up;
ipoib_mark_paths_invalid(dev);
+ /* Set IPoIB operation as down to prevent races between:
+ * the flush flow which leaves MCG and on the fly joins
+ * which can happen during that time. mcast restart task
+ * should deal with join requests we missed.
+ */
+ oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_mcast_dev_flush(dev);
+ if (oper_up)
+ set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
ipoib_flush_ah(dev);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e95c02ee05c0..5636fc3da6b8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
return -EINVAL;
}
-static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
+struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->path_tree.rb_node;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 8df608ede366..6dd43f63238e 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn)
INIT_LIST_HEAD(&isert_conn->node);
init_completion(&isert_conn->login_comp);
init_completion(&isert_conn->login_req_comp);
+ init_waitqueue_head(&isert_conn->rem_wait);
kref_init(&isert_conn->kref);
mutex_init(&isert_conn->mutex);
INIT_WORK(&isert_conn->release_work, isert_release_work);
@@ -448,7 +449,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn,
isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
if (!isert_conn->login_rsp_buf) {
- isert_err("Unable to allocate isert_conn->login_rspbuf\n");
+ ret = -ENOMEM;
goto out_unmap_login_req_buf;
}
@@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn)
BUG_ON(!device);
isert_free_rx_descriptors(isert_conn);
- if (isert_conn->cm_id)
+ if (isert_conn->cm_id &&
+ !isert_conn->dev_removed)
rdma_destroy_id(isert_conn->cm_id);
if (isert_conn->qp) {
@@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn)
isert_device_put(device);
- kfree(isert_conn);
+ if (isert_conn->dev_removed)
+ wake_up_interruptible(&isert_conn->rem_wait);
+ else
+ kfree(isert_conn);
}
static void
@@ -753,6 +758,7 @@ static int
isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
struct isert_np *isert_np = cma_id->context;
+ struct isert_conn *isert_conn;
int ret = 0;
isert_info("%s (%d): status %d id %p np %p\n",
@@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
break;
case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */
case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */
- case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */
ret = isert_disconnected_handler(cma_id, event->event);
break;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ isert_conn = cma_id->qp->qp_context;
+ isert_conn->dev_removed = true;
+ isert_disconnected_handler(cma_id, event->event);
+ wait_event_interruptible(isert_conn->rem_wait,
+ isert_conn->state == ISER_CONN_DOWN);
+ kfree(isert_conn);
+ /*
+ * return non-zero from the callback to destroy
+ * the rdma cm id
+ */
+ return 1;
case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */
case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */
case RDMA_CM_EVENT_CONNECT_ERROR:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index fc791efe3a10..c02ada57d7f5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -158,6 +158,8 @@ struct isert_conn {
struct work_struct release_work;
bool logout_posted;
bool snd_w_inv;
+ wait_queue_head_t rem_wait;
+ bool dev_removed;
};
#define ISERT_MAX_CQ 64
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 48a44af740a6..0b1f69ed2e92 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport)
if (ret)
goto err_query_port;
+ snprintf(sport->port_guid, sizeof(sport->port_guid),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
+
if (!sport->mad_agent) {
memset(&reg_req, 0, sizeof(reg_req));
reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device)
sdev->device->name, i);
goto err_ring;
}
- snprintf(sport->port_guid, sizeof(sport->port_guid),
- "0x%016llx%016llx",
- be64_to_cpu(sport->gid.global.subnet_prefix),
- be64_to_cpu(sport->gid.global.interface_id));
}
spin_lock(&srpt_dev_lock);