From 7f260e8575bf53b93b77978c1e39f8e67612759c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 24 Sep 2013 11:25:22 -0400 Subject: SUNRPC: Enable the keepalive option for TCP sockets For NFSv4 we want to avoid retransmitting RPC calls unless the TCP connection breaks. However we still want to detect TCP connection breakage as soon as possible. Do this by setting the keepalive option with the idle timeout and count set to the 'timeo' and 'retrans' mount options. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee03d35677d9..208a7634b916 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2112,6 +2112,19 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) if (!transport->inet) { struct sock *sk = sock->sk; + unsigned int keepidle = xprt->timeout->to_initval / HZ; + unsigned int keepcnt = xprt->timeout->to_retries + 1; + unsigned int opt_on = 1; + + /* TCP Keepalive options */ + kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&opt_on, sizeof(opt_on)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, + (char *)&keepidle, sizeof(keepidle)); + kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, + (char *)&keepcnt, sizeof(keepcnt)); write_lock_bh(&sk->sk_callback_lock); -- cgit v1.2.3 From 8b71798c0d389d4cadc884fc7d68c61ee8cd4f45 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Sep 2013 10:18:04 -0400 Subject: SUNRPC: Only update the TCP connect cookie on a successful connect Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 208a7634b916..9928ba164d62 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1511,6 +1511,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_copied = 0; transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; + xprt->connect_cookie++; xprt_wake_pending_tasks(xprt, -EAGAIN); } @@ -2164,7 +2165,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) case 0: case -EINPROGRESS: /* SYN_SENT! */ - xprt->connect_cookie++; if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; } -- cgit v1.2.3 From 0a6605213040dd2fb479f0d1a9a87a1d7fa70904 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Sep 2013 11:31:54 -0400 Subject: SUNRPC: Don't set the request connect_cookie until a successful transmit We're using the request connect_cookie to track whether or not a request was successfully transmitted on the current transport connection or not. For that reason we should ensure that it is only set after we've successfully transmitted the request. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 095363eee764..e9ee7bf3a638 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -912,7 +912,6 @@ void xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; - req->rq_connect_cookie = xprt->connect_cookie; req->rq_xtime = ktime_get(); status = xprt->ops->send_request(task); if (status != 0) { @@ -938,12 +937,14 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { + else { /* * Sleep on the pending queue since * we're expecting a reply. */ - rpc_sleep_on(&xprt->pending, task, xprt_timer); + if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) + rpc_sleep_on(&xprt->pending, task, xprt_timer); + req->rq_connect_cookie = xprt->connect_cookie; } spin_unlock_bh(&xprt->transport_lock); } @@ -1186,6 +1187,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_xprt = xprt; req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); + req->rq_connect_cookie = xprt->connect_cookie - 1; req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, -- cgit v1.2.3 From ee071eff0f1afafa9917254a6e4ee19d28085f1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Sep 2013 11:09:53 -0400 Subject: SUNRPC: Clear the request rq_bytes_sent field in xprt_release_write Otherwise the tests of req->rq_bytes_sent in xprt_prepare_transmit will fail if we're dealing with a resend. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e9ee7bf3a638..8cc5c8bcad7f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -358,6 +358,11 @@ out_unlock: void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { + if (task != NULL) { + struct rpc_rqst *req = task->tk_rqstp; + if (req != NULL) + req->rq_bytes_sent = 0; + } xprt_clear_locked(xprt); __xprt_lock_write_next(xprt); } @@ -375,6 +380,11 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { if (xprt->snd_task == task) { + if (task != NULL) { + struct rpc_rqst *req = task->tk_rqstp; + if (req != NULL) + req->rq_bytes_sent = 0; + } xprt_clear_locked(xprt); __xprt_lock_write_next_cong(xprt); } -- cgit v1.2.3 From 90051ea774613ffc6b8aad3dc665c8505d6205a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Sep 2013 12:17:18 -0400 Subject: SUNRPC: Clean up - convert xprt_prepare_transmit to return a bool Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 2 +- net/sunrpc/clnt.c | 6 ++---- net/sunrpc/xprt.c | 15 +++++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index cec7b9b5e1bf..8097b9df6773 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -288,7 +288,7 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); -int xprt_prepare_transmit(struct rpc_task *task); +bool xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 77479606a971..fa50e42aabd3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1722,8 +1722,7 @@ call_transmit(struct rpc_task *task) task->tk_action = call_status; if (task->tk_status < 0) return; - task->tk_status = xprt_prepare_transmit(task); - if (task->tk_status != 0) + if (!xprt_prepare_transmit(task)) return; task->tk_action = call_transmit_status; /* Encode here so that rpcsec_gss can use correct sequence number. */ @@ -1811,8 +1810,7 @@ call_bc_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - task->tk_status = xprt_prepare_transmit(task); - if (task->tk_status == -EAGAIN) { + if (!xprt_prepare_transmit(task)) { /* * Could not reserve the transport. Try again after the * transport is released. diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8cc5c8bcad7f..2326af57b9b9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -864,24 +864,27 @@ static inline int xprt_has_timer(struct rpc_xprt *xprt) * @task: RPC task about to send a request * */ -int xprt_prepare_transmit(struct rpc_task *task) +bool xprt_prepare_transmit(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int err = 0; + bool ret = false; dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { - err = req->rq_reply_bytes_recvd; + task->tk_status = req->rq_reply_bytes_recvd; goto out_unlock; } - if (!xprt->ops->reserve_xprt(xprt, task)) - err = -EAGAIN; + if (!xprt->ops->reserve_xprt(xprt, task)) { + task->tk_status = -EAGAIN; + goto out_unlock; + } + ret = true; out_unlock: spin_unlock_bh(&xprt->transport_lock); - return err; + return ret; } void xprt_end_transmit(struct rpc_task *task) -- cgit v1.2.3 From 8a19a0b6cb2e2216afd68ef2047f30260cc8a220 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 24 Sep 2013 12:00:27 -0400 Subject: SUNRPC: Add RPC task and client level options to disable the resend timeout Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 5 ++++- net/sunrpc/xprt.c | 15 ++++++++++++--- 4 files changed, 19 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6740801aa71a..943ee895f2d1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -49,6 +49,7 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_discrtry : 1,/* disconnect before retry */ + cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1;/* be verbose */ @@ -126,6 +127,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_QUIET (1UL << 6) #define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) +#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 096ee58be11a..3a847de83fab 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -122,6 +122,7 @@ struct rpc_task_setup { #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ #define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ +#define RPC_TASK_NO_RETRANS_TIMEOUT 0x4000 /* wait forever for a reply */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fa50e42aabd3..b58525009e40 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -772,6 +772,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; + if (clnt->cl_noretranstimeo) + task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; if (sk_memalloc_socks()) { struct rpc_xprt *xprt; @@ -1898,7 +1900,8 @@ call_status(struct rpc_task *task) rpc_delay(task, 3*HZ); case -ETIMEDOUT: task->tk_action = call_timeout; - if (task->tk_client->cl_discrtry) + if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) + && task->tk_client->cl_discrtry) xprt_conditional_disconnect(req->rq_xprt, req->rq_connect_cookie); break; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2326af57b9b9..d166d9947e36 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -873,9 +873,18 @@ bool xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { - task->tk_status = req->rq_reply_bytes_recvd; - goto out_unlock; + if (!req->rq_bytes_sent) { + if (req->rq_reply_bytes_recvd) { + task->tk_status = req->rq_reply_bytes_recvd; + goto out_unlock; + } + if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) + && xprt_connected(xprt) + && req->rq_connect_cookie == xprt->connect_cookie) { + xprt->ops->set_retrans_timeout(task); + rpc_sleep_on(&xprt->pending, task, xprt_timer); + goto out_unlock; + } } if (!xprt->ops->reserve_xprt(xprt, task)) { task->tk_status = -EAGAIN; -- cgit v1.2.3 From ca7f33aa5b8051f17eec81766b8f39c83caf4196 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Sep 2013 13:39:45 -0400 Subject: SUNRPC: Fix RPC call retransmission statistics A retransmit should be when you successfully transmit an RPC call to the server a second time. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b58525009e40..bde31159f632 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1719,6 +1719,8 @@ call_connect_status(struct rpc_task *task) static void call_transmit(struct rpc_task *task) { + int is_retrans = RPC_WAS_SENT(task); + dprint_status(task); task->tk_action = call_status; @@ -1743,6 +1745,8 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); if (task->tk_status < 0) return; + if (is_retrans) + task->tk_client->cl_stats->rpcretrans++; /* * On success, ensure that we call xprt_end_transmit() before sleeping * in order to allow access to the socket to other RPC requests. @@ -1983,7 +1987,6 @@ call_timeout(struct rpc_task *task) rpcauth_invalcred(task); retry: - clnt->cl_stats->rpcretrans++; task->tk_action = call_bind; task->tk_status = 0; } @@ -2026,7 +2029,6 @@ call_decode(struct rpc_task *task) if (req->rq_rcv_buf.len < 12) { if (!RPC_IS_SOFT(task)) { task->tk_action = call_bind; - clnt->cl_stats->rpcretrans++; goto out_retry; } dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", -- cgit v1.2.3 From 92551948174d079b12541437f51cbe3e17d9dd24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Sep 2013 11:28:40 -0400 Subject: SUNRPC: Remove redundant initialisations of request rq_bytes_sent Now that we clear the rq_bytes_sent field on unlock, we don't need to set it on lock, so we just set it once when initialising the request. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d166d9947e36..4953550537e0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -205,10 +205,8 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - if (req != NULL) { - req->rq_bytes_sent = 0; + if (req != NULL) req->rq_ntrans++; - } return 1; @@ -263,7 +261,6 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - req->rq_bytes_sent = 0; req->rq_ntrans++; return 1; } @@ -300,10 +297,8 @@ static bool __xprt_lock_write_func(struct rpc_task *task, void *data) req = task->tk_rqstp; xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; + if (req) req->rq_ntrans++; - } return true; } @@ -329,7 +324,6 @@ static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data) } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - req->rq_bytes_sent = 0; req->rq_ntrans++; return true; } @@ -1210,6 +1204,11 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) req->rq_buffer = NULL; req->rq_xid = xprt_alloc_xid(xprt); req->rq_connect_cookie = xprt->connect_cookie - 1; + req->rq_bytes_sent = 0; + req->rq_snd_buf.len = 0; + req->rq_snd_buf.buflen = 0; + req->rq_rcv_buf.len = 0; + req->rq_rcv_buf.buflen = 0; req->rq_release_snd_buf = NULL; xprt_reset_majortimeo(req); dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid, -- cgit v1.2.3 From 561ec1603171cd9b38dcf6cac53e8710f437a48d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Sep 2013 15:22:45 -0400 Subject: SUNRPC: call_connect_status should recheck bind and connect status on error Currently, we go directly to call_transmit which sends us to call_status on error. If we know that the connect attempt failed, we should rather just jump straight back to call_bind and call_connect. Ditto for EAGAIN, except do not delay. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index bde31159f632..7352aef8a254 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1692,6 +1692,7 @@ call_connect_status(struct rpc_task *task) dprint_status(task); trace_rpc_connect_status(task, status); + task->tk_status = 0; switch (status) { /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: @@ -1700,12 +1701,14 @@ call_connect_status(struct rpc_task *task) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + /* retry with existing socket, after a delay */ + rpc_delay(task, 3*HZ); if (RPC_IS_SOFTCONN(task)) break; - /* retry with existing socket, after a delay */ - case 0: case -EAGAIN: - task->tk_status = 0; + task->tk_action = call_bind; + return; + case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; -- cgit v1.2.3 From d746e54522e52275865ca23917c16b3fdc226e51 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2013 14:12:17 -0400 Subject: SUNRPC: Modify synopsis of rpc_client_register() The rpc_client_register() helper was added in commit e73f4cc0, "SUNRPC: split client creation routine into setup and registration," Mon Jun 24 11:52:52 2013. In a subsequent patch, I'd like to invoke rpc_client_register() from a context where a struct rpc_create_args is not available. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7352aef8a254..587f31e0577e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -272,12 +272,13 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); } -static int rpc_client_register(const struct rpc_create_args *args, - struct rpc_clnt *clnt) +static int rpc_client_register(struct rpc_clnt *clnt, + rpc_authflavor_t pseudoflavor, + const char *client_name) { struct rpc_auth_create_args auth_args = { - .pseudoflavor = args->authflavor, - .target_name = args->client_name, + .pseudoflavor = pseudoflavor, + .target_name = client_name, }; struct rpc_auth *auth; struct net *net = rpc_net_ns(clnt); @@ -298,7 +299,7 @@ static int rpc_client_register(const struct rpc_create_args *args, auth = rpcauth_create(&auth_args, clnt); if (IS_ERR(auth)) { dprintk("RPC: Couldn't create auth handle (flavor %u)\n", - args->authflavor); + pseudoflavor); err = PTR_ERR(auth); goto err_auth; } @@ -398,7 +399,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); - err = rpc_client_register(args, clnt); + err = rpc_client_register(clnt, args->authflavor, args->client_name); if (err) goto out_no_path; if (parent) -- cgit v1.2.3 From 40b00b6b17c412ff9ff28631250d32ee29ff0006 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2013 14:12:23 -0400 Subject: SUNRPC: Add a helper to switch the transport of an rpc_clnt Add an RPC client API to redirect an rpc_clnt's transport from a source server to a destination server during a migration event. Signed-off-by: Trond Myklebust [ cel: forward ported to 3.12 ] Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 ++ net/sunrpc/clnt.c | 107 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 104 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 943ee895f2d1..8af2804bab16 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -136,6 +136,10 @@ void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *, rpc_authflavor_t); +int rpc_switch_client_transport(struct rpc_clnt *, + struct xprt_create *, + const struct rpc_timeout *); + void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_client(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 587f31e0577e..f167d9c8d7dd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -264,6 +265,25 @@ void rpc_clients_notifier_unregister(void) return rpc_pipefs_notifier_unregister(&rpc_clients_block); } +static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + const struct rpc_timeout *timeout) +{ + struct rpc_xprt *old; + + spin_lock(&clnt->cl_lock); + old = clnt->cl_xprt; + + if (!xprt_bound(xprt)) + clnt->cl_autobind = 1; + + clnt->cl_timeout = timeout; + rcu_assign_pointer(clnt->cl_xprt, xprt); + spin_unlock(&clnt->cl_lock); + + return old; +} + static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) { clnt->cl_nodelen = strlen(nodename); @@ -338,7 +358,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, { const struct rpc_program *program = args->program; const struct rpc_version *version; - struct rpc_clnt *clnt = NULL; + struct rpc_clnt *clnt = NULL; + const struct rpc_timeout *timeout; int err; /* sanity check the name before trying to print it */ @@ -366,7 +387,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (err) goto out_no_clid; - rcu_assign_pointer(clnt->cl_xprt, xprt); clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; @@ -381,16 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, INIT_LIST_HEAD(&clnt->cl_tasks); spin_lock_init(&clnt->cl_lock); - if (!xprt_bound(xprt)) - clnt->cl_autobind = 1; - - clnt->cl_timeout = xprt->timeout; + timeout = xprt->timeout; if (args->timeout != NULL) { memcpy(&clnt->cl_timeout_default, args->timeout, sizeof(clnt->cl_timeout_default)); - clnt->cl_timeout = &clnt->cl_timeout_default; + timeout = &clnt->cl_timeout_default; } + rpc_clnt_set_transport(clnt, xprt, timeout); + clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); @@ -601,6 +620,80 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) } EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth); +/** + * rpc_switch_client_transport: switch the RPC transport on the fly + * @clnt: pointer to a struct rpc_clnt + * @args: pointer to the new transport arguments + * @timeout: pointer to the new timeout parameters + * + * This function allows the caller to switch the RPC transport for the + * rpc_clnt structure 'clnt' to allow it to connect to a mirrored NFS + * server, for instance. It assumes that the caller has ensured that + * there are no active RPC tasks by using some form of locking. + * + * Returns zero if "clnt" is now using the new xprt. Otherwise a + * negative errno is returned, and "clnt" continues to use the old + * xprt. + */ +int rpc_switch_client_transport(struct rpc_clnt *clnt, + struct xprt_create *args, + const struct rpc_timeout *timeout) +{ + const struct rpc_timeout *old_timeo; + rpc_authflavor_t pseudoflavor; + struct rpc_xprt *xprt, *old; + struct rpc_clnt *parent; + int err; + + xprt = xprt_create_transport(args); + if (IS_ERR(xprt)) { + dprintk("RPC: failed to create new xprt for clnt %p\n", + clnt); + return PTR_ERR(xprt); + } + + pseudoflavor = clnt->cl_auth->au_flavor; + + old_timeo = clnt->cl_timeout; + old = rpc_clnt_set_transport(clnt, xprt, timeout); + + rpc_unregister_client(clnt); + __rpc_clnt_remove_pipedir(clnt); + + /* + * A new transport was created. "clnt" therefore + * becomes the root of a new cl_parent tree. clnt's + * children, if it has any, still point to the old xprt. + */ + parent = clnt->cl_parent; + clnt->cl_parent = clnt; + + /* + * The old rpc_auth cache cannot be re-used. GSS + * contexts in particular are between a single + * client and server. + */ + err = rpc_client_register(clnt, pseudoflavor, NULL); + if (err) + goto out_revert; + + synchronize_rcu(); + if (parent != clnt) + rpc_release_client(parent); + xprt_put(old); + dprintk("RPC: replaced xprt for clnt %p\n", clnt); + return 0; + +out_revert: + rpc_clnt_set_transport(clnt, old, old_timeo); + clnt->cl_parent = parent; + rpc_client_register(clnt, pseudoflavor, NULL); + xprt_put(xprt); + dprintk("RPC: failed to switch xprt for clnt %p\n", clnt); + return err; +} +EXPORT_SYMBOL_GPL(rpc_switch_client_transport); + /* * Kill all tasks for the given client. * XXX: kill their descendants as well? -- cgit v1.2.3 From 34751b9d04a221da2a74b27ba439f01c0ae30069 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Oct 2013 16:42:44 -0400 Subject: SUNRPC: Add correct rcu_dereference annotation in rpc_clnt_set_transport rpc_clnt_set_transport should use rcu_derefence_protected(), as it is only safe to be called with the rpc_clnt::cl_lock held. Cc: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f167d9c8d7dd..759b78b056a7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -272,7 +272,8 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, struct rpc_xprt *old; spin_lock(&clnt->cl_lock); - old = clnt->cl_xprt; + old = rcu_dereference_protected(clnt->cl_xprt, + lockdep_is_held(&clnt->cl_lock)); if (!xprt_bound(xprt)) clnt->cl_autobind = 1; -- cgit v1.2.3 From e3bfab18483a26649ff9cb605aa1410386b1e498 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 2 Oct 2013 09:48:15 -0400 Subject: sunrpc: comment typo fix Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9928ba164d62..9deed17fd3e4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2872,8 +2872,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (args->bc_xprt->xpt_bc_xprt) { /* * This server connection already has a backchannel - * export; we can't create a new one, as we wouldn't be - * able to match replies based on xid any more. So, + * transport; we can't create a new one, as we wouldn't + * be able to match replies based on xid any more. So, * reuse the already-existing one: */ return args->bc_xprt->xpt_bc_xprt; -- cgit v1.2.3 From 8313164c36473193c8034de643dc32f35a22bf59 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Tue, 15 Oct 2013 11:44:30 +0800 Subject: SUNRPC: remove an unnecessary if statement If req allocated failed just goto out_free, no need to check the 'i < num_prealloc'. There is just code simplification, no functional changes. Signed-off-by: Wang Weidong Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4953550537e0..04199bc8416f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1104,11 +1104,9 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size, for (i = 0; i < num_prealloc; i++) { req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); if (!req) - break; + goto out_free; list_add(&req->rq_list, &xprt->free); } - if (i < num_prealloc) - goto out_free; if (max_alloc > num_prealloc) xprt->max_reqs = max_alloc; else -- cgit v1.2.3 From 5fccc5b52ee07d07a74ce53c6f174bff81e26a16 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Oct 2013 18:41:44 -0400 Subject: SUNRPC: gss_alloc_msg - choose _either_ a v0 message or a v1 message Add the missing 'break' to ensure that we don't corrupt a legacy 'v0' type message by appending the 'v1'. Cc: Bruce Fields Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 084656671d6e..cc24323d3045 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -482,6 +482,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, switch (vers) { case 0: gss_encode_v0_msg(gss_msg); + break; default: gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); }; -- cgit v1.2.3 From 9d3a2260f0f4bd6379b0a0f131c743fff25b0029 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 28 Oct 2013 18:18:00 -0400 Subject: SUNRPC: Fix buffer overflow checking in gss_encode_v0_msg/gss_encode_v1_msg In gss_encode_v1_msg, it is pointless to BUG() after the overflow has happened. Replace the existing sprintf()-based code with scnprintf(), and warn if an overflow is ever triggered. In gss_encode_v0_msg, replace the runtime BUG_ON() with an appropriate compile-time BUILD_BUG_ON. Reported-by: Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 56 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index cc24323d3045..97912b40c254 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -420,41 +420,53 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg) memcpy(gss_msg->databuf, &uid, sizeof(uid)); gss_msg->msg.data = gss_msg->databuf; gss_msg->msg.len = sizeof(uid); - BUG_ON(sizeof(uid) > UPCALL_BUF_LEN); + + BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); } -static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, +static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, const char *target_name) { struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; - int len = 0; - - gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ", - mech->gm_name, - from_kuid(&init_user_ns, gss_msg->uid)); - p += gss_msg->msg.len; + size_t buflen = sizeof(gss_msg->databuf); + int len; + + len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name, + from_kuid(&init_user_ns, gss_msg->uid)); + buflen -= len; + p += len; + gss_msg->msg.len = len; if (target_name) { - len = sprintf(p, "target=%s ", target_name); + len = scnprintf(p, buflen, "target=%s ", target_name); + buflen -= len; p += len; gss_msg->msg.len += len; } if (service_name != NULL) { - len = sprintf(p, "service=%s ", service_name); + len = scnprintf(p, buflen, "service=%s ", service_name); + buflen -= len; p += len; gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { - len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes); + len = scnprintf(p, buflen, "enctypes=%s ", + mech->gm_upcall_enctypes); + buflen -= len; p += len; gss_msg->msg.len += len; } - len = sprintf(p, "\n"); + len = scnprintf(p, buflen, "\n"); + if (len == 0) + goto out_overflow; gss_msg->msg.len += len; gss_msg->msg.data = gss_msg->databuf; - BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN); + return 0; +out_overflow: + WARN_ON_ONCE(1); + return -ENOMEM; } static struct gss_upcall_msg * @@ -463,15 +475,15 @@ gss_alloc_msg(struct gss_auth *gss_auth, { struct gss_upcall_msg *gss_msg; int vers; + int err = -ENOMEM; gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); if (gss_msg == NULL) - return ERR_PTR(-ENOMEM); + goto err; vers = get_pipe_version(gss_auth->net); - if (vers < 0) { - kfree(gss_msg); - return ERR_PTR(vers); - } + err = vers; + if (err < 0) + goto err_free_msg; gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); @@ -484,9 +496,15 @@ gss_alloc_msg(struct gss_auth *gss_auth, gss_encode_v0_msg(gss_msg); break; default: - gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); + err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); + if (err) + goto err_free_msg; }; return gss_msg; +err_free_msg: + kfree(gss_msg); +err: + return ERR_PTR(err); } static struct gss_upcall_msg * -- cgit v1.2.3 From 09c3e54635c85b3da44d3bc156619c1f1af3bb43 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 30 Oct 2013 13:32:15 +0800 Subject: SUNRPC: remove duplicated include from clnt.c Remove duplicated include. Signed-off-by: Wei Yongjun Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 759b78b056a7..dab09dac8fc7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 93dc41bdc5c853916610576c6b48a1704959c70d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 Oct 2013 16:14:36 +1100 Subject: SUNRPC: close a rare race in xs_tcp_setup_socket. We have one report of a crash in xs_tcp_setup_socket. The call path to the crash is: xs_tcp_setup_socket -> inet_stream_connect -> lock_sock_nested. The 'sock' passed to that last function is NULL. The only way I can see this happening is a concurrent call to xs_close: xs_close -> xs_reset_transport -> sock_release -> inet_release inet_release sets: sock->sk = NULL; inet_stream_connect calls lock_sock(sock->sk); which gets NULL. All calls to xs_close are protected by XPRT_LOCKED as are most activations of the workqueue which runs xs_tcp_setup_socket. The exception is xs_tcp_schedule_linger_timeout. So presumably the timeout queued by the later fires exactly when some other code runs xs_close(). To protect against this we can move the cancel_delayed_work_sync() call from xs_destory() to xs_close(). As xs_close is never called from the worker scheduled on ->connect_worker, this can never deadlock. Signed-off-by: NeilBrown [Trond: Make it safe to call cancel_delayed_work_sync() on AF_LOCAL sockets] Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9deed17fd3e4..a4709bbf8e5e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -835,6 +835,8 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); + cancel_delayed_work_sync(&transport->connect_worker); + xs_reset_transport(transport); xprt->reestablish_timeout = 0; @@ -869,12 +871,8 @@ static void xs_local_destroy(struct rpc_xprt *xprt) */ static void xs_destroy(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_delayed_work_sync(&transport->connect_worker); - xs_local_destroy(xprt); } @@ -1817,6 +1815,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } #endif +static void xs_dummy_setup_socket(struct work_struct *work) +{ +} + static struct socket *xs_create_sock(struct rpc_xprt *xprt, struct sock_xprt *transport, int family, int type, int protocol) { @@ -2668,6 +2670,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->ops = &xs_local_ops; xprt->timeout = &xs_local_default_timeout; + INIT_DELAYED_WORK(&transport->connect_worker, + xs_dummy_setup_socket); + switch (sun->sun_family) { case AF_LOCAL: if (sun->sun_path[0] != '/') { -- cgit v1.2.3 From a1311d87fa034e0de580e3a65f2d5c2e7a1f55f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 31 Oct 2013 09:18:49 -0400 Subject: SUNRPC: Cleanup xs_destroy() There is no longer any need for a separate xs_local_destroy() helper. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a4709bbf8e5e..17c88928b7db 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -856,14 +856,6 @@ static void xs_tcp_close(struct rpc_xprt *xprt) xs_tcp_shutdown(xprt); } -static void xs_local_destroy(struct rpc_xprt *xprt) -{ - xs_close(xprt); - xs_free_peer_addresses(xprt); - xprt_free(xprt); - module_put(THIS_MODULE); -} - /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -873,7 +865,10 @@ static void xs_destroy(struct rpc_xprt *xprt) { dprintk("RPC: xs_destroy xprt %p\n", xprt); - xs_local_destroy(xprt); + xs_close(xprt); + xs_free_peer_addresses(xprt); + xprt_free(xprt); + module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -2513,7 +2508,7 @@ static struct rpc_xprt_ops xs_local_ops = { .send_request = xs_local_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, .close = xs_close, - .destroy = xs_local_destroy, + .destroy = xs_destroy, .print_stats = xs_local_print_stats, }; -- cgit v1.2.3