From 97f6cd39da227459cb46ed4088d37d5d8db51c50 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 14 Apr 2015 10:45:42 -0500 Subject: md-cluster: re-add capabilities When "re-add" is writted to /sys/block/mdXX/md/dev-YYY/state, the clustered md: 1. Sends RE_ADD message with the desc_nr. Nodes receiving the message clear the Faulty bit in their respective rdev->flags. 2. The node initiating re-add, gathers the bitmaps of all nodes and copies them into the local bitmap. It does not clear the bitmap from which it is copying. 3. Initiating node schedules a md recovery to sync the devices. Signed-off-by: Guoqing Jiang Signed-off-by: Goldwyn Rodrigues Signed-off-by: NeilBrown --- drivers/md/md-cluster.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'drivers/md/md-cluster.c') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 30b41b70db17..fcfc4b9b2672 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -73,6 +73,7 @@ enum msg_type { RESYNCING, NEWDISK, REMOVE, + RE_ADD, }; struct cluster_msg { @@ -253,7 +254,7 @@ static void recover_bitmaps(struct md_thread *thread) str, ret); goto clear_bit; } - ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi); + ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); goto dlm_unlock; @@ -412,6 +413,16 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot); } +static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg) +{ + struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot); + + if (rdev && test_bit(Faulty, &rdev->flags)) + clear_bit(Faulty, &rdev->flags); + else + pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot); +} + static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) { switch (msg->type) { @@ -436,6 +447,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) __func__, __LINE__, msg->slot); process_remove_disk(mddev, msg); break; + case RE_ADD: + pr_info("%s: %d Received RE_ADD from %d\n", + __func__, __LINE__, msg->slot); + process_readd_disk(mddev, msg); + break; default: pr_warn("%s:%d Received unknown message from %d\n", __func__, __LINE__, msg->slot); @@ -883,6 +899,35 @@ static int remove_disk(struct mddev *mddev, struct md_rdev *rdev) return __sendmsg(cinfo, &cmsg); } +static int gather_bitmaps(struct md_rdev *rdev) +{ + int sn, err; + sector_t lo, hi; + struct cluster_msg cmsg; + struct mddev *mddev = rdev->mddev; + struct md_cluster_info *cinfo = mddev->cluster_info; + + cmsg.type = RE_ADD; + cmsg.raid_slot = rdev->desc_nr; + err = sendmsg(cinfo, &cmsg); + if (err) + goto out; + + for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) { + if (sn == (cinfo->slot_number - 1)) + continue; + err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false); + if (err) { + pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn); + goto out; + } + if ((hi > 0) && (lo < mddev->recovery_cp)) + mddev->recovery_cp = lo; + } +out: + return err; +} + static struct md_cluster_operations cluster_ops = { .join = join, .leave = leave, @@ -898,6 +943,7 @@ static struct md_cluster_operations cluster_ops = { .add_new_disk_finish = add_new_disk_finish, .new_disk_ack = new_disk_ack, .remove_disk = remove_disk, + .gather_bitmaps = gather_bitmaps, }; static int __init cluster_init(void) -- cgit v1.2.3