summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2012-10-31 10:42:51 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2012-10-31 10:42:51 +0100
commitf563a5d7a820424756f358e747238f03e866838a (patch)
treef78fa474b1933bd395af401a6d745150f4ecd15e /block
parenta27365265cc2fed1178bf25a205e8ee02a9c0caf (diff)
parentaee0bf7d8d7564f8f2c40e4501695c492b7dd8d1 (diff)
Merge remote-tracking branch 'origin/master' into threadpool
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs1
-rw-r--r--block/commit.c11
-rw-r--r--block/mirror.c322
-rw-r--r--block/stream.c4
4 files changed, 326 insertions, 12 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 30ef6aec03..7f015105b1 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -17,3 +17,4 @@ endif
common-obj-y += stream.o
common-obj-y += commit.o
+common-obj-y += mirror.o
diff --git a/block/commit.c b/block/commit.c
index 733c91403c..fae79582d4 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -160,7 +160,7 @@ exit_restore_reopen:
bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
}
- block_job_complete(&s->common, ret);
+ block_job_completed(&s->common, ret);
}
static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -211,15 +211,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
return;
}
- /* top and base may be valid, but let's make sure that base is reachable
- * from top */
- if (bdrv_find_backing_image(top, base->filename) != base) {
- error_setg(errp,
- "Base (%s) is not reachable from top (%s)",
- base->filename, top->filename);
- return;
- }
-
overlay_bs = bdrv_find_overlay(bs, top);
if (overlay_bs == NULL) {
diff --git a/block/mirror.c b/block/mirror.c
new file mode 100644
index 0000000000..d6618a4b34
--- /dev/null
+++ b/block/mirror.c
@@ -0,0 +1,322 @@
+/*
+ * Image mirroring
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+ /*
+ * Size of data buffer for populating the image file. This should be large
+ * enough to process multiple clusters in a single call, so that populating
+ * contiguous regions of the image is efficient.
+ */
+ BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct MirrorBlockJob {
+ BlockJob common;
+ RateLimit limit;
+ BlockDriverState *target;
+ MirrorSyncMode mode;
+ BlockdevOnError on_source_error, on_target_error;
+ bool synced;
+ bool should_complete;
+ int64_t sector_num;
+ uint8_t *buf;
+} MirrorBlockJob;
+
+static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
+ int error)
+{
+ s->synced = false;
+ if (read) {
+ return block_job_error_action(&s->common, s->common.bs,
+ s->on_source_error, true, error);
+ } else {
+ return block_job_error_action(&s->common, s->target,
+ s->on_target_error, false, error);
+ }
+}
+
+static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
+ BlockErrorAction *p_action)
+{
+ BlockDriverState *source = s->common.bs;
+ BlockDriverState *target = s->target;
+ QEMUIOVector qiov;
+ int ret, nb_sectors;
+ int64_t end;
+ struct iovec iov;
+
+ end = s->common.len >> BDRV_SECTOR_BITS;
+ s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
+ nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
+ bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+
+ /* Copy the dirty cluster. */
+ iov.iov_base = s->buf;
+ iov.iov_len = nb_sectors * 512;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
+ ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+ if (ret < 0) {
+ *p_action = mirror_error_action(s, true, -ret);
+ goto fail;
+ }
+ ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+ if (ret < 0) {
+ *p_action = mirror_error_action(s, false, -ret);
+ s->synced = false;
+ goto fail;
+ }
+ return 0;
+
+fail:
+ /* Try again later. */
+ bdrv_set_dirty(source, s->sector_num, nb_sectors);
+ return ret;
+}
+
+static void coroutine_fn mirror_run(void *opaque)
+{
+ MirrorBlockJob *s = opaque;
+ BlockDriverState *bs = s->common.bs;
+ int64_t sector_num, end;
+ int ret = 0;
+ int n;
+
+ if (block_job_is_cancelled(&s->common)) {
+ goto immediate_exit;
+ }
+
+ s->common.len = bdrv_getlength(bs);
+ if (s->common.len < 0) {
+ block_job_completed(&s->common, s->common.len);
+ return;
+ }
+
+ end = s->common.len >> BDRV_SECTOR_BITS;
+ s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+
+ if (s->mode != MIRROR_SYNC_MODE_NONE) {
+ /* First part, loop on the sectors and initialize the dirty bitmap. */
+ BlockDriverState *base;
+ base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
+ for (sector_num = 0; sector_num < end; ) {
+ int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+ ret = bdrv_co_is_allocated_above(bs, base,
+ sector_num, next - sector_num, &n);
+
+ if (ret < 0) {
+ goto immediate_exit;
+ }
+
+ assert(n > 0);
+ if (ret == 1) {
+ bdrv_set_dirty(bs, sector_num, n);
+ sector_num = next;
+ } else {
+ sector_num += n;
+ }
+ }
+ }
+
+ s->sector_num = -1;
+ for (;;) {
+ uint64_t delay_ns;
+ int64_t cnt;
+ bool should_complete;
+
+ cnt = bdrv_get_dirty_count(bs);
+ if (cnt != 0) {
+ BlockErrorAction action = BDRV_ACTION_REPORT;
+ ret = mirror_iteration(s, &action);
+ if (ret < 0 && action == BDRV_ACTION_REPORT) {
+ goto immediate_exit;
+ }
+ cnt = bdrv_get_dirty_count(bs);
+ }
+
+ should_complete = false;
+ if (cnt == 0) {
+ trace_mirror_before_flush(s);
+ ret = bdrv_flush(s->target);
+ if (ret < 0) {
+ if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
+ goto immediate_exit;
+ }
+ } else {
+ /* We're out of the streaming phase. From now on, if the job
+ * is cancelled we will actually complete all pending I/O and
+ * report completion. This way, block-job-cancel will leave
+ * the target in a consistent state.
+ */
+ s->common.offset = end * BDRV_SECTOR_SIZE;
+ if (!s->synced) {
+ block_job_ready(&s->common);
+ s->synced = true;
+ }
+
+ should_complete = s->should_complete ||
+ block_job_is_cancelled(&s->common);
+ cnt = bdrv_get_dirty_count(bs);
+ }
+ }
+
+ if (cnt == 0 && should_complete) {
+ /* The dirty bitmap is not updated while operations are pending.
+ * If we're about to exit, wait for pending operations before
+ * calling bdrv_get_dirty_count(bs), or we may exit while the
+ * source has dirty data to copy!
+ *
+ * Note that I/O can be submitted by the guest while
+ * mirror_populate runs.
+ */
+ trace_mirror_before_drain(s, cnt);
+ bdrv_drain_all();
+ cnt = bdrv_get_dirty_count(bs);
+ }
+
+ ret = 0;
+ trace_mirror_before_sleep(s, cnt, s->synced);
+ if (!s->synced) {
+ /* Publish progress */
+ s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+
+ if (s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+ } else {
+ delay_ns = 0;
+ }
+
+ /* Note that even when no rate limit is applied we need to yield
+ * with no pending I/O here so that qemu_aio_flush() returns.
+ */
+ block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ if (block_job_is_cancelled(&s->common)) {
+ break;
+ }
+ } else if (!should_complete) {
+ delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+ block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+ } else if (cnt == 0) {
+ /* The two disks are in sync. Exit and report successful
+ * completion.
+ */
+ assert(QLIST_EMPTY(&bs->tracked_requests));
+ s->common.cancelled = false;
+ break;
+ }
+ }
+
+immediate_exit:
+ g_free(s->buf);
+ bdrv_set_dirty_tracking(bs, false);
+ bdrv_iostatus_disable(s->target);
+ if (s->should_complete && ret == 0) {
+ if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
+ bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+ }
+ bdrv_swap(s->target, s->common.bs);
+ }
+ bdrv_close(s->target);
+ bdrv_delete(s->target);
+ block_job_completed(&s->common, ret);
+}
+
+static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ if (speed < 0) {
+ error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ return;
+ }
+ ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static void mirror_iostatus_reset(BlockJob *job)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ bdrv_iostatus_reset(s->target);
+}
+
+static void mirror_complete(BlockJob *job, Error **errp)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+ int ret;
+
+ ret = bdrv_open_backing_file(s->target);
+ if (ret < 0) {
+ char backing_filename[PATH_MAX];
+ bdrv_get_full_backing_filename(s->target, backing_filename,
+ sizeof(backing_filename));
+ error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
+ return;
+ }
+ if (!s->synced) {
+ error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+ return;
+ }
+
+ s->should_complete = true;
+ block_job_resume(job);
+}
+
+static BlockJobType mirror_job_type = {
+ .instance_size = sizeof(MirrorBlockJob),
+ .job_type = "mirror",
+ .set_speed = mirror_set_speed,
+ .iostatus_reset= mirror_iostatus_reset,
+ .complete = mirror_complete,
+};
+
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, MirrorSyncMode mode,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp)
+{
+ MirrorBlockJob *s;
+
+ if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
+ on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+ !bdrv_iostatus_is_enabled(bs)) {
+ error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+ return;
+ }
+
+ s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
+ s->on_source_error = on_source_error;
+ s->on_target_error = on_target_error;
+ s->target = target;
+ s->mode = mode;
+ bdrv_set_dirty_tracking(bs, true);
+ bdrv_set_enable_write_cache(s->target, true);
+ bdrv_set_on_error(s->target, on_target_error, on_target_error);
+ bdrv_iostatus_enable(s->target);
+ s->common.co = qemu_coroutine_create(mirror_run);
+ trace_mirror_start(bs, s, s->common.co, opaque);
+ qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block/stream.c b/block/stream.c
index 792665276e..0c0fc7a13b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -86,7 +86,7 @@ static void coroutine_fn stream_run(void *opaque)
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
- block_job_complete(&s->common, s->common.len);
+ block_job_completed(&s->common, s->common.len);
return;
}
@@ -184,7 +184,7 @@ wait:
}
qemu_vfree(buf);
- block_job_complete(&s->common, ret);
+ block_job_completed(&s->common, ret);
}
static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)