summaryrefslogtreecommitdiff
path: root/fs/dlm
diff options
context:
space:
mode:
authorGang He <ghe@suse.com>2018-05-02 10:28:35 -0500
committerDavid Teigland <teigland@redhat.com>2018-05-02 10:28:35 -0500
commitf706d830154b6c8405c6b77595a60d135182c97e (patch)
treeb09e5fa8f6c7df5b9da46888053358dba1f83c9e /fs/dlm
parentb09c603ca40fc6980188f9c7f419c0918a3f461e (diff)
dlm: make sctp_connect_to_sock() return in specified time
When the user setup a two-ring cluster, DLM kernel module will automatically selects to use SCTP protocol to communicate between each node. There will be about 5 minute hang in DLM kernel module, in case one ring is broken before switching to another ring, this will potentially affect the dependent upper applications, e.g. ocfs2, gfs2, clvm and clustered-MD, etc. Unfortunately, if the user setup a two-ring cluster, we can not specify DLM communication protocol with TCP explicitly, since DLM kernel module only supports SCTP protocol for multiple ring cluster. Base on my investigation, the time is spent in sock->ops->connect() function before returns ETIMEDOUT(-110) error, since O_NONBLOCK argument in connect() function does not work here, then we should make sock->ops->connect() function return in specified time via setting socket SO_SNDTIMEO atrribute. Signed-off-by: Gang He <ghe@suse.com> Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/lowcomms.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 815125281a86..d31e9abfb9f1 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1037,6 +1037,7 @@ static void sctp_connect_to_sock(struct connection *con)
int result;
int addr_len;
struct socket *sock;
+ struct timeval tv = { .tv_sec = 5, .tv_usec = 0 };
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
@@ -1083,8 +1084,19 @@ static void sctp_connect_to_sock(struct connection *con)
kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
sizeof(one));
+ /*
+ * Make sock->ops->connect() function return in specified time,
+ * since O_NONBLOCK argument in connect() function does not work here,
+ * then, we should restore the default value of this attribute.
+ */
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
O_NONBLOCK);
+ memset(&tv, 0, sizeof(tv));
+ kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+ sizeof(tv));
+
if (result == -EINPROGRESS)
result = 0;
if (result == 0)