ocfs2: special case recovery lock in dlmlock_remote()

If the previous master of the recovery lock dies, let calc_usage take it down completely and let the caller completely redo the dlmlock() call. Otherwise, there will never be an opportunity to re-master the lockres and recovery wont be able to progress. Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
author: Kurt Hackel <kurt.hackel@oracle.com> 2006-05-01 13:47:50 -0700
committer: Mark Fasheh <mark.fasheh@oracle.com> 2006-06-26 14:43:08 -0700
commit: c8df412e1c746dd21094966d04b3a79aad0f4d08 (patch)
tree: b662c0c5b9ff4e5a607d53adb92a8c38acce940e
parent: 36407488b1cbc4d84bc2bd14e03f3f9b768090d9 (diff)
2 files changed, 27 insertions, 10 deletions
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 0ff934874942..20b38dc18736 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -227,7 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
 	lock->lock_pending = 0;
 	if (status != DLM_NORMAL) {
-		if (status != DLM_NOTQUEUED) {
+		if (status == DLM_RECOVERING &&
+		    dlm_is_recovery_lock(res->lockname.name,
+					 res->lockname.len)) {
+			/* recovery lock was mastered by dead node.
+			 * we need to have calc_usage shoot down this
+			 * lockres and completely remaster it. */
+			mlog(0, "%s: recovery lock was owned by "
+			     "dead node %u, remaster it now.\n",
+			     dlm->name, res->owner);
+		} else if (status != DLM_NOTQUEUED) {
 			/*
 			 * DO NOT call calc_usage, as this would unhash
 			 * the remote lockres before we ever get to use
@@ -691,18 +700,22 @@ retry_lock:
 			msleep(100);
 			/* no waiting for dlm_reco_thread */
 			if (recovery) {
-				if (status == DLM_RECOVERING) {
-					mlog(0, "%s: got RECOVERING "
-					     "for $REOCVERY lock, master "
-					     "was %u\n", dlm->name, 
-					     res->owner);
-					dlm_wait_for_node_death(dlm, res->owner, 
-							DLM_NODE_DEATH_WAIT_MAX);
-				}
+				if (status != DLM_RECOVERING)
+					goto retry_lock;
+
+				mlog(0, "%s: got RECOVERING "
+				     "for $RECOVERY lock, master "
+				     "was %u\n", dlm->name,
+				     res->owner);
+				/* wait to see the node go down, then
+				 * drop down and allow the lockres to
+				 * get cleaned up.  need to remaster. */
+				dlm_wait_for_node_death(dlm, res->owner,
+						DLM_NODE_DEATH_WAIT_MAX);
 			} else {
 				dlm_wait_for_recovery(dlm);
+				goto retry_lock;
 			}
-			goto retry_lock;
 		}
 
 		if (status != DLM_NORMAL) {
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 86199f66eb56..00209f4a2916 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2314,6 +2314,10 @@ again:
 		mlog(0, "%s: reco master %u is ready to recover %u\n",
 		     dlm->name, dlm->reco.new_master, dlm->reco.dead_node);
 		status = -EEXIST;
+	} else if (ret == DLM_RECOVERING) {
+		mlog(0, "dlm=%s dlmlock says master node died (this=%u)\n",
+		     dlm->name, dlm->node_num);
+		goto again;
 	} else {
 		struct dlm_lock_resource *res;
author	Kurt Hackel <kurt.hackel@oracle.com>	2006-05-01 13:47:50 -0700
committer	Mark Fasheh <mark.fasheh@oracle.com>	2006-06-26 14:43:08 -0700
commit	c8df412e1c746dd21094966d04b3a79aad0f4d08 (patch)
tree	b662c0c5b9ff4e5a607d53adb92a8c38acce940e
parent	36407488b1cbc4d84bc2bd14e03f3f9b768090d9 (diff)