summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma/rpc_rdma.c
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-02-03 11:53:18 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-02-03 11:54:58 -0500
commitcc3ea893cbb386e657d775f299f61bb9ba3bc435 (patch)
treefd17f7af86fbe30d9c4c9d02102b3cbd02e90ca6 /net/sunrpc/xprtrdma/rpc_rdma.c
parentc7c545d4a34872f4a3d710e22f21fb61f7258706 (diff)
parenta0a1d50cd1e80652142af5cddcde500d06c71bdd (diff)
Merge tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma
NFS: Client side changes for RDMA These patches improve the scalability of the NFSoRDMA client and take large variables off of the stack. Additionally, the GFP_* flags are updated to match what TCP uses. Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> * tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma: (21 commits) xprtrdma: Update the GFP flags used in xprt_rdma_allocate() xprtrdma: Clean up after adding regbuf management xprtrdma: Allocate zero pad separately from rpcrdma_buffer xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep xprtrdma: Allocate RPC/RDMA send buffer separately from struct rpcrdma_req xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req xprtrdma: Add struct rpcrdma_regbuf and helpers xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy() xprtrdma: Simplify synopsis of rpcrdma_buffer_create() xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr off the stack xprtrdma: Take struct ib_device_attr off the stack xprtrdma: Free the pd if ib_query_qp() fails xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt xprtrdma: Move credit update to RPC reply handler xprtrdma: Remove rl_mr field, and the mr_chunk union xprtrdma: Remove rpcrdma_ep::rep_ia xprtrdma: Rename "xprt" and "rdma_connect" fields in struct rpcrdma_xprt xprtrdma: Clean up hdrlen xprtrdma: Display XIDs in host byte order xprtrdma: Modernize htonl and ntohl ...
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c108
1 files changed, 62 insertions, 46 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index df01d124936c..7e9acd9361c5 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -209,9 +209,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
if (cur_rchunk) { /* read */
cur_rchunk->rc_discrim = xdr_one;
/* all read chunks have the same "position" */
- cur_rchunk->rc_position = htonl(pos);
- cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
- cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+ cur_rchunk->rc_position = cpu_to_be32(pos);
+ cur_rchunk->rc_target.rs_handle =
+ cpu_to_be32(seg->mr_rkey);
+ cur_rchunk->rc_target.rs_length =
+ cpu_to_be32(seg->mr_len);
xdr_encode_hyper(
(__be32 *)&cur_rchunk->rc_target.rs_offset,
seg->mr_base);
@@ -222,8 +224,10 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
cur_rchunk++;
r_xprt->rx_stats.read_chunk_count++;
} else { /* write/reply */
- cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
- cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+ cur_wchunk->wc_target.rs_handle =
+ cpu_to_be32(seg->mr_rkey);
+ cur_wchunk->wc_target.rs_length =
+ cpu_to_be32(seg->mr_len);
xdr_encode_hyper(
(__be32 *)&cur_wchunk->wc_target.rs_offset,
seg->mr_base);
@@ -257,7 +261,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
*iptr++ = xdr_zero; /* encode a NULL reply chunk */
} else {
warray->wc_discrim = xdr_one;
- warray->wc_nchunks = htonl(nchunks);
+ warray->wc_nchunks = cpu_to_be32(nchunks);
iptr = (__be32 *) cur_wchunk;
if (type == rpcrdma_writech) {
*iptr++ = xdr_zero; /* finish the write chunk list */
@@ -290,7 +294,7 @@ ssize_t
rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
- struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
+ struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
if (req->rl_rtype != rpcrdma_noch)
result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
@@ -402,13 +406,12 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
base = rqst->rq_svec[0].iov_base;
rpclen = rqst->rq_svec[0].iov_len;
- /* build RDMA header in private area at front */
- headerp = (struct rpcrdma_msg *) req->rl_base;
- /* don't htonl XID, it's already done in request */
+ headerp = rdmab_to_msg(req->rl_rdmabuf);
+ /* don't byte-swap XID, it's already done in request */
headerp->rm_xid = rqst->rq_xid;
- headerp->rm_vers = xdr_one;
- headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
- headerp->rm_type = htonl(RDMA_MSG);
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests);
+ headerp->rm_type = rdma_msg;
/*
* Chunks needed for results?
@@ -468,7 +471,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
return -EIO;
}
- hdrlen = 28; /*sizeof *headerp;*/
+ hdrlen = RPCRDMA_HDRLEN_MIN;
padlen = 0;
/*
@@ -482,11 +485,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
RPCRDMA_INLINE_PAD_VALUE(rqst));
if (padlen) {
- headerp->rm_type = htonl(RDMA_MSGP);
+ headerp->rm_type = rdma_msgp;
headerp->rm_body.rm_padded.rm_align =
- htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+ cpu_to_be32(RPCRDMA_INLINE_PAD_VALUE(rqst));
headerp->rm_body.rm_padded.rm_thresh =
- htonl(RPCRDMA_INLINE_PAD_THRESH);
+ cpu_to_be32(RPCRDMA_INLINE_PAD_THRESH);
headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
@@ -524,7 +527,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
- headerp, base, req->rl_iov.lkey);
+ headerp, base, rdmab_lkey(req->rl_rdmabuf));
/*
* initialize send_iov's - normally only two: rdma chunk header and
@@ -533,26 +536,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* header and any write data. In all non-rdma cases, any following
* data has been copied into the RPC header buffer.
*/
- req->rl_send_iov[0].addr = req->rl_iov.addr;
+ req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
req->rl_send_iov[0].length = hdrlen;
- req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+ req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
- req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+ req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
req->rl_send_iov[1].length = rpclen;
- req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+ req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
req->rl_niovs = 2;
if (padlen) {
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
- req->rl_send_iov[2].addr = ep->rep_pad.addr;
+ req->rl_send_iov[2].addr = rdmab_addr(ep->rep_padbuf);
req->rl_send_iov[2].length = padlen;
- req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+ req->rl_send_iov[2].lkey = rdmab_lkey(ep->rep_padbuf);
req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
- req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+ req->rl_send_iov[3].lkey = rdmab_lkey(req->rl_sendbuf);
req->rl_niovs = 4;
}
@@ -569,8 +572,9 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
{
unsigned int i, total_len;
struct rpcrdma_write_chunk *cur_wchunk;
+ char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf);
- i = ntohl(**iptrp); /* get array count */
+ i = be32_to_cpu(**iptrp);
if (i > max)
return -1;
cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
@@ -582,11 +586,11 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
xdr_decode_hyper((__be32 *)&seg->rs_offset, &off);
dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
__func__,
- ntohl(seg->rs_length),
+ be32_to_cpu(seg->rs_length),
(unsigned long long)off,
- ntohl(seg->rs_handle));
+ be32_to_cpu(seg->rs_handle));
}
- total_len += ntohl(seg->rs_length);
+ total_len += be32_to_cpu(seg->rs_length);
++cur_wchunk;
}
/* check and adjust for properly terminated write chunk */
@@ -596,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
return -1;
cur_wchunk = (struct rpcrdma_write_chunk *) w;
}
- if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+ if ((char *)cur_wchunk > base + rep->rr_len)
return -1;
*iptrp = (__be32 *) cur_wchunk;
@@ -691,7 +695,9 @@ rpcrdma_connect_worker(struct work_struct *work)
{
struct rpcrdma_ep *ep =
container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
- struct rpc_xprt *xprt = ep->rep_xprt;
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ep, struct rpcrdma_xprt, rx_ep);
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
spin_lock_bh(&xprt->transport_lock);
if (++xprt->connect_cookie == 0) /* maintain a reserved value */
@@ -732,7 +738,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
- int rdmalen, status;
+ int credits, rdmalen, status;
unsigned long cwnd;
/* Check status. If bad, signal disconnect and return rep to pool */
@@ -744,14 +750,14 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
}
return;
}
- if (rep->rr_len < 28) {
+ if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
}
- headerp = (struct rpcrdma_msg *) rep->rr_base;
- if (headerp->rm_vers != xdr_one) {
+ headerp = rdmab_to_msg(rep->rr_rdmabuf);
+ if (headerp->rm_vers != rpcrdma_version) {
dprintk("RPC: %s: invalid version %d\n",
- __func__, ntohl(headerp->rm_vers));
+ __func__, be32_to_cpu(headerp->rm_vers));
goto repost;
}
@@ -762,7 +768,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p failed "
"to match any request xid 0x%08x len %d\n",
- __func__, rep, headerp->rm_xid, rep->rr_len);
+ __func__, rep, be32_to_cpu(headerp->rm_xid),
+ rep->rr_len);
repost:
r_xprt->rx_stats.bad_reply_count++;
rep->rr_func = rpcrdma_reply_handler;
@@ -778,13 +785,14 @@ repost:
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: duplicate reply 0x%p to RPC "
"request 0x%p: xid 0x%08x\n", __func__, rep, req,
- headerp->rm_xid);
+ be32_to_cpu(headerp->rm_xid));
goto repost;
}
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n",
- __func__, rep, req, rqst, headerp->rm_xid);
+ __func__, rep, req, rqst,
+ be32_to_cpu(headerp->rm_xid));
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
@@ -793,7 +801,7 @@ repost:
/* check for expected message types */
/* The order of some of these tests is important. */
switch (headerp->rm_type) {
- case htonl(RDMA_MSG):
+ case rdma_msg:
/* never expect read chunks */
/* never expect reply chunks (two ways to check) */
/* never expect write chunks without having offered RDMA */
@@ -824,22 +832,24 @@ repost:
} else {
/* else ordinary inline */
rdmalen = 0;
- iptr = (__be32 *)((unsigned char *)headerp + 28);
- rep->rr_len -= 28; /*sizeof *headerp;*/
+ iptr = (__be32 *)((unsigned char *)headerp +
+ RPCRDMA_HDRLEN_MIN);
+ rep->rr_len -= RPCRDMA_HDRLEN_MIN;
status = rep->rr_len;
}
/* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break;
- case htonl(RDMA_NOMSG):
+ case rdma_nomsg:
/* never expect read or write chunks, always reply chunks */
if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
headerp->rm_body.rm_chunks[1] != xdr_zero ||
headerp->rm_body.rm_chunks[2] != xdr_one ||
req->rl_nchunks == 0)
goto badheader;
- iptr = (__be32 *)((unsigned char *)headerp + 28);
+ iptr = (__be32 *)((unsigned char *)headerp +
+ RPCRDMA_HDRLEN_MIN);
rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
if (rdmalen < 0)
goto badheader;
@@ -853,7 +863,7 @@ badheader:
dprintk("%s: invalid rpcrdma reply header (type %d):"
" chunks[012] == %d %d %d"
" expected chunks <= %d\n",
- __func__, ntohl(headerp->rm_type),
+ __func__, be32_to_cpu(headerp->rm_type),
headerp->rm_body.rm_chunks[0],
headerp->rm_body.rm_chunks[1],
headerp->rm_body.rm_chunks[2],
@@ -863,8 +873,14 @@ badheader:
break;
}
+ credits = be32_to_cpu(headerp->rm_credit);
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > r_xprt->rx_buf.rb_max_requests)
+ credits = r_xprt->rx_buf.rb_max_requests;
+
cwnd = xprt->cwnd;
- xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);