From d5208c45be38ab858db6ec5a5097aa1c1a8ebbc9 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 16 Oct 2012 15:49:10 -0400 Subject: block: in commit, determine base image from the top image This simplifies some code and error checking, and also fixes a bug. bdrv_find_backing_image() should only be passed absolute filenames, or filenames relative to the chain. In the QMP message handler for block commit, when looking up the base do so from the determined top image, so we know it is reachable from top. Some of the error messages put out by block-commit have changed slightly, which causes 2 tests cases for block-commit to fail. This patch updates the test cases to look for the correct error output. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/commit.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'block') diff --git a/block/commit.c b/block/commit.c index 733c91403c..13d9e82471 100644 --- a/block/commit.c +++ b/block/commit.c @@ -211,15 +211,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } - /* top and base may be valid, but let's make sure that base is reachable - * from top */ - if (bdrv_find_backing_image(top, base->filename) != base) { - error_setg(errp, - "Base (%s) is not reachable from top (%s)", - base->filename, top->filename); - return; - } - overlay_bs = bdrv_find_overlay(bs, top); if (overlay_bs == NULL) { -- cgit v1.2.3 From 65f4632243f526958aa1f6b3911add98329c3796 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:20 +0200 Subject: block: rename block_job_complete to block_job_completed The imperative will be used for the QMP command. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/commit.c | 2 +- block/stream.c | 4 ++-- blockjob.c | 2 +- blockjob.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'block') diff --git a/block/commit.c b/block/commit.c index 13d9e82471..fae79582d4 100644 --- a/block/commit.c +++ b/block/commit.c @@ -160,7 +160,7 @@ exit_restore_reopen: bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block/stream.c b/block/stream.c index 792665276e..0c0fc7a13b 100644 --- a/block/stream.c +++ b/block/stream.c @@ -86,7 +86,7 @@ static void coroutine_fn stream_run(void *opaque) s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { - block_job_complete(&s->common, s->common.len); + block_job_completed(&s->common, s->common.len); return; } @@ -184,7 +184,7 @@ wait: } qemu_vfree(buf); - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/blockjob.c b/blockjob.c index f55f55a193..b5c16f3766 100644 --- a/blockjob.c +++ b/blockjob.c @@ -71,7 +71,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, return job; } -void block_job_complete(BlockJob *job, int ret) +void block_job_completed(BlockJob *job, int ret) { BlockDriverState *bs = job->bs; diff --git a/blockjob.h b/blockjob.h index 930cc3c46a..c2261a91f4 100644 --- a/blockjob.h +++ b/blockjob.h @@ -135,14 +135,14 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); /** - * block_job_complete: + * block_job_completed: * @job: The job being completed. * @ret: The status code. * * Call the completion function that was registered at creation time, and * free @job. */ -void block_job_complete(BlockJob *job, int ret); +void block_job_completed(BlockJob *job, int ret); /** * block_job_set_speed: -- cgit v1.2.3 From 893f7ebafe4e8afc0ce4dbd9e64b3752f3036bbb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:23 +0200 Subject: mirror: introduce mirror job This patch adds the implementation of a new job that mirrors a disk to a new image while letting the guest continue using the old image. The target is treated as a "black box" and data is copied from the source to the target in the background. This can be used for several purposes, including storage migration, continuous replication, and observation of the guest I/O in an external program. It is also a first step in replacing the inefficient block migration code that is part of QEMU. The job is possibly never-ending, but it is logically structured into two phases: 1) copy all data as fast as possible until the target first gets in sync with the source; 2) keep target in sync and ensure that reopening to the target gets a correct (full) copy of the source data. The second phase is indicated by the progress in "info block-jobs" reporting the current offset to be equal to the length of the file. When the job is cancelled in the second phase, QEMU will run the job until the source is clean and quiescent, then it will report successful completion of the job. In other words, the BLOCK_JOB_CANCELLED event means that the target may _not_ be consistent with a past state of the source; the BLOCK_JOB_COMPLETED event means that the target is consistent with a past state of the source. (Note that it could already happen that management lost the race against QEMU and got a completion event instead of cancellation). It is not yet possible to complete the job and switch over to the target disk. The next patches will fix this and add many refinements to the basic idea introduced here. These include improved error management, some tunable knobs and performance optimizations. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/Makefile.objs | 1 + block/mirror.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 20 +++++ qapi-schema.json | 17 ++++ trace-events | 7 ++ 5 files changed, 280 insertions(+) create mode 100644 block/mirror.c (limited to 'block') diff --git a/block/Makefile.objs b/block/Makefile.objs index 554f429d05..806e526eb4 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -12,3 +12,4 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o common-obj-y += stream.o common-obj-y += commit.o +common-obj-y += mirror.o diff --git a/block/mirror.c b/block/mirror.c new file mode 100644 index 0000000000..b353798e8f --- /dev/null +++ b/block/mirror.c @@ -0,0 +1,235 @@ +/* + * Image mirroring + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "blockjob.h" +#include "block_int.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct MirrorBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *target; + MirrorSyncMode mode; + int64_t sector_num; + uint8_t *buf; +} MirrorBlockJob; + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->common.bs; + BlockDriverState *target = s->target; + QEMUIOVector qiov; + int ret, nb_sectors; + int64_t end; + struct iovec iov; + + end = s->common.len >> BDRV_SECTOR_BITS; + s->sector_num = bdrv_get_next_dirty(source, s->sector_num); + nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); + bdrv_reset_dirty(source, s->sector_num, nb_sectors); + + /* Copy the dirty cluster. */ + iov.iov_base = s->buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + trace_mirror_one_iteration(s, s->sector_num, nb_sectors); + ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + return ret; + } + return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); +} + +static void coroutine_fn mirror_run(void *opaque) +{ + MirrorBlockJob *s = opaque; + BlockDriverState *bs = s->common.bs; + int64_t sector_num, end; + int ret = 0; + int n; + bool synced = false; + + if (block_job_is_cancelled(&s->common)) { + goto immediate_exit; + } + + s->common.len = bdrv_getlength(bs); + if (s->common.len < 0) { + block_job_completed(&s->common, s->common.len); + return; + } + + end = s->common.len >> BDRV_SECTOR_BITS; + s->buf = qemu_blockalign(bs, BLOCK_SIZE); + + if (s->mode != MIRROR_SYNC_MODE_NONE) { + /* First part, loop on the sectors and initialize the dirty bitmap. */ + BlockDriverState *base; + base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; + for (sector_num = 0; sector_num < end; ) { + int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + ret = bdrv_co_is_allocated_above(bs, base, + sector_num, next - sector_num, &n); + + if (ret < 0) { + goto immediate_exit; + } + + assert(n > 0); + if (ret == 1) { + bdrv_set_dirty(bs, sector_num, n); + sector_num = next; + } else { + sector_num += n; + } + } + } + + s->sector_num = -1; + for (;;) { + uint64_t delay_ns; + int64_t cnt; + bool should_complete; + + cnt = bdrv_get_dirty_count(bs); + if (cnt != 0) { + ret = mirror_iteration(s); + if (ret < 0) { + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(bs); + } + + should_complete = false; + if (cnt == 0) { + trace_mirror_before_flush(s); + ret = bdrv_flush(s->target); + if (ret < 0) { + goto immediate_exit; + } + + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + synced = true; + s->common.offset = end * BDRV_SECTOR_SIZE; + should_complete = block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); + } + + if (cnt == 0 && should_complete) { + /* The dirty bitmap is not updated while operations are pending. + * If we're about to exit, wait for pending operations before + * calling bdrv_get_dirty_count(bs), or we may exit while the + * source has dirty data to copy! + * + * Note that I/O can be submitted by the guest while + * mirror_populate runs. + */ + trace_mirror_before_drain(s, cnt); + bdrv_drain_all(); + cnt = bdrv_get_dirty_count(bs); + } + + ret = 0; + trace_mirror_before_sleep(s, cnt, synced); + if (!synced) { + /* Publish progress */ + s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; + + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + } else { + delay_ns = 0; + } + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + } else if (!should_complete) { + delay_ns = (cnt == 0 ? SLICE_TIME : 0); + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + } else if (cnt == 0) { + /* The two disks are in sync. Exit and report successful + * completion. + */ + assert(QLIST_EMPTY(&bs->tracked_requests)); + s->common.cancelled = false; + break; + } + } + +immediate_exit: + g_free(s->buf); + bdrv_set_dirty_tracking(bs, false); + bdrv_close(s->target); + bdrv_delete(s->target); + block_job_completed(&s->common, ret); +} + +static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType mirror_job_type = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = "mirror", + .set_speed = mirror_set_speed, +}; + +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + MirrorBlockJob *s; + + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->target = target; + s->mode = mode; + bdrv_set_dirty_tracking(bs, true); + bdrv_set_enable_write_cache(s->target, true); + s->common.co = qemu_coroutine_create(mirror_run); + trace_mirror_start(bs, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block_int.h b/block_int.h index f4bae04401..aaa46a83b0 100644 --- a/block_int.h +++ b/block_int.h @@ -331,4 +331,24 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/* + * mirror_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @mode: Whether to collapse all images in the chain to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @bs until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/qapi-schema.json b/qapi-schema.json index 37bbecab60..8c4b7c808b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1165,6 +1165,23 @@ { 'enum': 'BlockdevOnError', 'data': ['report', 'ignore', 'enospc', 'stop'] } +## +# @MirrorSyncMode: +# +# An enumeration of possible behaviors for the initial synchronization +# phase of storage mirroring. +# +# @top: copies data in the topmost image to the destination +# +# @full: copies data from all images to the destination +# +# @none: only copy data written from now on +# +# Since: 1.3 +## +{ 'enum': 'MirrorSyncMode', + 'data': ['top', 'full', 'none'] } + ## # @BlockJobInfo: # diff --git a/trace-events b/trace-events index 9ab8e2781a..09b5d558c6 100644 --- a/trace-events +++ b/trace-events @@ -77,6 +77,13 @@ stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" +# block/mirror.c +mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p" +mirror_before_flush(void *s) "s %p" +mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 +mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d" +mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" + # blockdev.c qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" -- cgit v1.2.3 From d63ffd87acad618a4a64b8812b64ad88577ae9b1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:25 +0200 Subject: mirror: implement completion Switching to the target of the migration is done mostly asynchronously, and reported to management via the BLOCK_JOB_COMPLETED event; the only synchronous phase is opening the backing files. bdrv_open_backing_file can always be done, even for migration of the full image (aka sync: 'full'). In this case, qmp_drive_mirror will create the target disk with no backing file at all, and bdrv_open_backing_file will be a no-op. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/mirror.c b/block/mirror.c index b353798e8f..6320f6a2e5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,6 +32,8 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; MirrorSyncMode mode; + bool synced; + bool should_complete; int64_t sector_num; uint8_t *buf; } MirrorBlockJob; @@ -70,7 +72,6 @@ static void coroutine_fn mirror_run(void *opaque) int64_t sector_num, end; int ret = 0; int n; - bool synced = false; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; @@ -136,9 +137,14 @@ static void coroutine_fn mirror_run(void *opaque) * report completion. This way, block-job-cancel will leave * the target in a consistent state. */ - synced = true; s->common.offset = end * BDRV_SECTOR_SIZE; - should_complete = block_job_is_cancelled(&s->common); + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); cnt = bdrv_get_dirty_count(bs); } @@ -157,8 +163,8 @@ static void coroutine_fn mirror_run(void *opaque) } ret = 0; - trace_mirror_before_sleep(s, cnt, synced); - if (!synced) { + trace_mirror_before_sleep(s, cnt, s->synced); + if (!s->synced) { /* Publish progress */ s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; @@ -191,6 +197,12 @@ static void coroutine_fn mirror_run(void *opaque) immediate_exit: g_free(s->buf); bdrv_set_dirty_tracking(bs, false); + if (s->should_complete && ret == 0) { + if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { + bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + } + bdrv_swap(s->target, s->common.bs); + } bdrv_close(s->target); bdrv_delete(s->target); block_job_completed(&s->common, ret); @@ -207,10 +219,33 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } +static void mirror_complete(BlockJob *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + int ret; + + ret = bdrv_open_backing_file(s->target); + if (ret < 0) { + char backing_filename[PATH_MAX]; + bdrv_get_full_backing_filename(s->target, backing_filename, + sizeof(backing_filename)); + error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); + return; + } + if (!s->synced) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + s->should_complete = true; + block_job_resume(job); +} + static BlockJobType mirror_job_type = { .instance_size = sizeof(MirrorBlockJob), .job_type = "mirror", .set_speed = mirror_set_speed, + .complete = mirror_complete, }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, -- cgit v1.2.3 From b952b5589a36114e06201c0d2e82c293dbad2b1f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2012 16:49:28 +0200 Subject: mirror: add support for on-source-error/on-target-error Error management is important for mirroring; otherwise, an error on the target (even something as "innocent" as ENOSPC) requires to start again with a full copy. Similar to on_read_error/on_write_error, two separate knobs are provided for on_source_error (reads) and on_target_error (writes). The default is 'report' for both. The 'ignore' policy will leave the sector dirty, so that it will be retried later. Thus, it will not cause corruption. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block/mirror.c | 94 +++++++++++++++++++++++++++++++++++++++++++------------- block_int.h | 4 +++ blockdev.c | 14 +++++++-- hmp.c | 3 +- qapi-schema.json | 11 ++++++- qmp-commands.hx | 8 ++++- 6 files changed, 108 insertions(+), 26 deletions(-) (limited to 'block') diff --git a/block/mirror.c b/block/mirror.c index 6320f6a2e5..d6618a4b34 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -32,13 +32,28 @@ typedef struct MirrorBlockJob { RateLimit limit; BlockDriverState *target; MirrorSyncMode mode; + BlockdevOnError on_source_error, on_target_error; bool synced; bool should_complete; int64_t sector_num; uint8_t *buf; } MirrorBlockJob; -static int coroutine_fn mirror_iteration(MirrorBlockJob *s) +static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, + int error) +{ + s->synced = false; + if (read) { + return block_job_error_action(&s->common, s->common.bs, + s->on_source_error, true, error); + } else { + return block_job_error_action(&s->common, s->target, + s->on_target_error, false, error); + } +} + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s, + BlockErrorAction *p_action) { BlockDriverState *source = s->common.bs; BlockDriverState *target = s->target; @@ -60,9 +75,21 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_one_iteration(s, s->sector_num, nb_sectors); ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); if (ret < 0) { - return ret; + *p_action = mirror_error_action(s, true, -ret); + goto fail; + } + ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, false, -ret); + s->synced = false; + goto fail; } - return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + return 0; + +fail: + /* Try again later. */ + bdrv_set_dirty(source, s->sector_num, nb_sectors); + return ret; } static void coroutine_fn mirror_run(void *opaque) @@ -117,8 +144,9 @@ static void coroutine_fn mirror_run(void *opaque) cnt = bdrv_get_dirty_count(bs); if (cnt != 0) { - ret = mirror_iteration(s); - if (ret < 0) { + BlockErrorAction action = BDRV_ACTION_REPORT; + ret = mirror_iteration(s, &action); + if (ret < 0 && action == BDRV_ACTION_REPORT) { goto immediate_exit; } cnt = bdrv_get_dirty_count(bs); @@ -129,23 +157,25 @@ static void coroutine_fn mirror_run(void *opaque) trace_mirror_before_flush(s); ret = bdrv_flush(s->target); if (ret < 0) { - goto immediate_exit; - } - - /* We're out of the streaming phase. From now on, if the job - * is cancelled we will actually complete all pending I/O and - * report completion. This way, block-job-cancel will leave - * the target in a consistent state. - */ - s->common.offset = end * BDRV_SECTOR_SIZE; - if (!s->synced) { - block_job_ready(&s->common); - s->synced = true; + if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + } else { + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + s->common.offset = end * BDRV_SECTOR_SIZE; + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); } - - should_complete = s->should_complete || - block_job_is_cancelled(&s->common); - cnt = bdrv_get_dirty_count(bs); } if (cnt == 0 && should_complete) { @@ -197,6 +227,7 @@ static void coroutine_fn mirror_run(void *opaque) immediate_exit: g_free(s->buf); bdrv_set_dirty_tracking(bs, false); + bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); @@ -219,6 +250,13 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } +static void mirror_iostatus_reset(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -245,25 +283,39 @@ static BlockJobType mirror_job_type = { .instance_size = sizeof(MirrorBlockJob), .job_type = "mirror", .set_speed = mirror_set_speed, + .iostatus_reset= mirror_iostatus_reset, .complete = mirror_complete, }; void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { MirrorBlockJob *s; + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); if (!s) { return; } + s->on_source_error = on_source_error; + s->on_target_error = on_target_error; s->target = target; s->mode = mode; bdrv_set_dirty_tracking(bs, true); bdrv_set_enable_write_cache(s->target, true); + bdrv_set_on_error(s->target, on_target_error, on_target_error); + bdrv_iostatus_enable(s->target); s->common.co = qemu_coroutine_create(mirror_run); trace_mirror_start(bs, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block_int.h b/block_int.h index aaa46a83b0..00204eb82e 100644 --- a/block_int.h +++ b/block_int.h @@ -337,6 +337,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, * @target: Block device to write to. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. * @mode: Whether to collapse all images in the chain to the target. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -348,6 +350,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, */ void mirror_start(BlockDriverState *bs, BlockDriverState *target, int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/blockdev.c b/blockdev.c index 431c678f8f..a068a4b669 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1185,7 +1185,10 @@ void qmp_drive_mirror(const char *device, const char *target, bool has_format, const char *format, enum MirrorSyncMode sync, bool has_mode, enum NewImageMode mode, - bool has_speed, int64_t speed, Error **errp) + bool has_speed, int64_t speed, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) { BlockDriverInfo bdi; BlockDriverState *bs; @@ -1200,6 +1203,12 @@ void qmp_drive_mirror(const char *device, const char *target, if (!has_speed) { speed = 0; } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } if (!has_mode) { mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; } @@ -1292,7 +1301,8 @@ void qmp_drive_mirror(const char *device, const char *target, } } - mirror_start(bs, target_bs, speed, sync, block_job_cb, bs, &local_err); + mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); if (local_err != NULL) { bdrv_delete(target_bs); error_propagate(errp, local_err); diff --git a/hmp.c b/hmp.c index e53025306a..4a458ac0e8 100644 --- a/hmp.c +++ b/hmp.c @@ -795,7 +795,8 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict) qmp_drive_mirror(device, filename, !!format, format, full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, - true, mode, false, 0, &errp); + true, mode, false, 0, + false, 0, false, 0, &errp); hmp_handle_error(mon, &errp); } diff --git a/qapi-schema.json b/qapi-schema.json index a066cd5ca2..6aa443e90e 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1629,6 +1629,14 @@ # (all the disk, only the sectors allocated in the topmost image, or # only new I/O). # +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound # @@ -1637,7 +1645,8 @@ { 'command': 'drive-mirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', - '*speed': 'int' } } + '*speed': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } ## # @migrate_cancel diff --git a/qmp-commands.hx b/qmp-commands.hx index 614baea784..c31312f8ef 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -937,7 +937,8 @@ EQMP { .name = "drive-mirror", - .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?", + .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," + "on-source-error:s?,on-target-error:s?", .mhandler.cmd_new = qmp_marshal_input_drive_mirror, }, @@ -965,6 +966,11 @@ Arguments: possibilities include "full" for all the disk, "top" for only the sectors allocated in the topmost image, or "none" to only replicate new I/O (MirrorSyncMode). +- "on-source-error": the action to take on an error on the source + (BlockdevOnError, default 'report') +- "on-target-error": the action to take on an error on the target + (BlockdevOnError, default 'report') + Example: -- cgit v1.2.3