summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--QMP/qmp-events.txt22
-rw-r--r--block-migration.c4
-rw-r--r--block.c556
-rw-r--r--block/backup.c6
-rw-r--r--block/blkverify.c4
-rw-r--r--block/commit.c6
-rw-r--r--block/cow.c93
-rw-r--r--block/iscsi.c16
-rw-r--r--block/mirror.c6
-rw-r--r--block/qapi.c50
-rw-r--r--block/qcow.c15
-rw-r--r--block/qcow2.c26
-rw-r--r--block/qed.c41
-rw-r--r--block/raw-posix.c24
-rw-r--r--block/raw-win32.c36
-rw-r--r--block/raw_bsd.c10
-rw-r--r--block/sheepdog.c20
-rw-r--r--block/snapshot.c2
-rw-r--r--block/stream.c12
-rw-r--r--block/vdi.c17
-rw-r--r--block/vmdk.c33
-rw-r--r--block/vvfat.c21
-rw-r--r--blockdev-nbd.c10
-rw-r--r--blockdev.c242
-rw-r--r--blockjob.c1
-rw-r--r--hmp.c36
-rw-r--r--hw/block/dataplane/virtio-blk.c9
-rw-r--r--hw/block/xen_disk.c13
-rw-r--r--hw/i386/pc_piix.c18
-rw-r--r--hw/i386/pc_q35.c20
-rw-r--r--hw/net/e1000.c131
-rw-r--r--hw/net/ne2000.c2
-rw-r--r--hw/net/vmxnet3.h2
-rw-r--r--include/block/block.h38
-rw-r--r--include/block/block_int.h35
-rw-r--r--include/hw/i386/pc.h8
-rw-r--r--include/hw/xen/xen.h4
-rw-r--r--include/net/net.h2
-rw-r--r--include/qemu/throttle.h110
-rw-r--r--nbd.c5
-rw-r--r--net/hub.c2
-rw-r--r--net/net.c14
-rw-r--r--net/tap-bsd.c11
-rw-r--r--qapi-schema.json69
-rw-r--r--qemu-img-cmds.hx10
-rw-r--r--qemu-img.c317
-rw-r--r--qemu-img.texi70
-rw-r--r--qemu-io-cmds.c4
-rw-r--r--qemu-io.c6
-rw-r--r--qemu-options.hx6
-rw-r--r--qmp-commands.hx32
-rw-r--r--tests/Makefile2
-rw-r--r--tests/qemu-iotests/026.out28
-rw-r--r--tests/qemu-iotests/026.out.nocache626
-rw-r--r--tests/qemu-iotests/039.out4
-rwxr-xr-xtests/qemu-iotests/06397
-rw-r--r--tests/qemu-iotests/063.out10
-rwxr-xr-xtests/qemu-iotests/check240
-rw-r--r--tests/qemu-iotests/common422
-rw-r--r--tests/qemu-iotests/common.config6
-rw-r--r--tests/qemu-iotests/common.filter42
-rw-r--r--tests/qemu-iotests/common.pattern4
-rw-r--r--tests/qemu-iotests/common.rc92
-rw-r--r--tests/qemu-iotests/group1
-rw-r--r--tests/test-aio.c11
-rw-r--r--tests/test-throttle.c481
-rw-r--r--util/Makefile.objs1
-rw-r--r--util/throttle.c396
-rw-r--r--xen-all.c7
-rw-r--r--xen-stub.c2
70 files changed, 3593 insertions, 1126 deletions
diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 39b6016460..4b24ec900d 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -18,6 +18,28 @@ Example:
"data": { "actual": 944766976 },
"timestamp": { "seconds": 1267020223, "microseconds": 435656 } }
+BLOCK_IMAGE_CORRUPTED
+---------------------
+
+Emitted when a disk image is being marked corrupt.
+
+Data:
+
+- "device": Device name (json-string)
+- "msg": Informative message (e.g., reason for the corruption) (json-string)
+- "offset": If the corruption resulted from an image access, this is the access
+ offset into the image (json-int)
+- "size": If the corruption resulted from an image access, this is the access
+ size (json-int)
+
+Example:
+
+{ "event": "BLOCK_IMAGE_CORRUPTED",
+ "data": { "device": "ide0-hd0",
+ "msg": "Prevented active L1 table overwrite", "offset": 196608,
+ "size": 65536 },
+ "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
+
BLOCK_IO_ERROR
--------------
diff --git a/block-migration.c b/block-migration.c
index f803f2006f..daf9ec1eab 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -336,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
bmds->completed_sectors = 0;
bmds->shared_base = block_mig_state.shared_base;
alloc_aio_bitmap(bmds);
- drive_get_ref(drive_get_by_blockdev(bs));
bdrv_set_in_use(bs, 1);
+ bdrv_ref(bs);
block_mig_state.total_sector_sum += sectors;
@@ -575,7 +575,7 @@ static void blk_mig_cleanup(void)
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
bdrv_set_in_use(bmds->bs, 0);
- drive_put_ref(drive_get_by_blockdev(bmds->bs));
+ bdrv_unref(bmds->bs);
g_free(bmds->aio_bitmap);
g_free(bmds);
}
diff --git a/block.c b/block.c
index 26639e8b70..a325efcb21 100644
--- a/block.c
+++ b/block.c
@@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque);
static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait);
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait);
-
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -123,69 +116,101 @@ int is_windows_drive(const char *filename)
#endif
/* throttling disk I/O limits */
-void bdrv_io_limits_disable(BlockDriverState *bs)
+void bdrv_set_io_limits(BlockDriverState *bs,
+ ThrottleConfig *cfg)
{
- bs->io_limits_enabled = false;
+ int i;
+
+ throttle_config(&bs->throttle_state, cfg);
+
+ for (i = 0; i < 2; i++) {
+ qemu_co_enter_next(&bs->throttled_reqs[i]);
+ }
+}
+
+/* this function drain all the throttled IOs */
+static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
+{
+ bool drained = false;
+ bool enabled = bs->io_limits_enabled;
+ int i;
- do {} while (qemu_co_enter_next(&bs->throttled_reqs));
+ bs->io_limits_enabled = false;
- if (bs->block_timer) {
- timer_del(bs->block_timer);
- timer_free(bs->block_timer);
- bs->block_timer = NULL;
+ for (i = 0; i < 2; i++) {
+ while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+ drained = true;
+ }
}
- bs->slice_start = 0;
- bs->slice_end = 0;
+ bs->io_limits_enabled = enabled;
+
+ return drained;
}
-static void bdrv_block_timer(void *opaque)
+void bdrv_io_limits_disable(BlockDriverState *bs)
{
- BlockDriverState *bs = opaque;
+ bs->io_limits_enabled = false;
+
+ bdrv_start_throttled_reqs(bs);
- qemu_co_enter_next(&bs->throttled_reqs);
+ throttle_destroy(&bs->throttle_state);
}
-void bdrv_io_limits_enable(BlockDriverState *bs)
+static void bdrv_throttle_read_timer_cb(void *opaque)
{
- bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs);
- bs->io_limits_enabled = true;
+ BlockDriverState *bs = opaque;
+ qemu_co_enter_next(&bs->throttled_reqs[0]);
+}
+
+static void bdrv_throttle_write_timer_cb(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ qemu_co_enter_next(&bs->throttled_reqs[1]);
}
-bool bdrv_io_limits_enabled(BlockDriverState *bs)
+/* should be called before bdrv_set_io_limits if a limit is set */
+void bdrv_io_limits_enable(BlockDriverState *bs)
{
- BlockIOLimit *io_limits = &bs->io_limits;
- return io_limits->bps[BLOCK_IO_LIMIT_READ]
- || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
- || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
- || io_limits->iops[BLOCK_IO_LIMIT_READ]
- || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
- || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+ assert(!bs->io_limits_enabled);
+ throttle_init(&bs->throttle_state,
+ QEMU_CLOCK_VIRTUAL,
+ bdrv_throttle_read_timer_cb,
+ bdrv_throttle_write_timer_cb,
+ bs);
+ bs->io_limits_enabled = true;
}
+/* This function makes an IO wait if needed
+ *
+ * @nb_sectors: the number of sectors of the IO
+ * @is_write: is the IO a write
+ */
static void bdrv_io_limits_intercept(BlockDriverState *bs,
- bool is_write, int nb_sectors)
+ int nb_sectors,
+ bool is_write)
{
- int64_t wait_time = -1;
+ /* does this io must wait */
+ bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
- qemu_co_queue_wait(&bs->throttled_reqs);
+ /* if must wait or any request of this type throttled queue the IO */
+ if (must_wait ||
+ !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
+ qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
}
- /* In fact, we hope to keep each request's timing, in FIFO mode. The next
- * throttled requests will not be dequeued until the current request is
- * allowed to be serviced. So if the current request still exceeds the
- * limits, it will be inserted to the head. All requests followed it will
- * be still in throttled_reqs queue.
- */
+ /* the IO will be executed, do the accounting */
+ throttle_account(&bs->throttle_state,
+ is_write,
+ nb_sectors * BDRV_SECTOR_SIZE);
- while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
- timer_mod(bs->block_timer,
- wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
- qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
+ /* if the next request must wait -> do nothing */
+ if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
+ return;
}
- qemu_co_queue_next(&bs->throttled_reqs);
+ /* else queue next request for execution */
+ qemu_co_queue_next(&bs->throttled_reqs[is_write]);
}
/* check if the path starts with "<protocol>:" */
@@ -305,7 +330,9 @@ BlockDriverState *bdrv_new(const char *device_name)
bdrv_iostatus_disable(bs);
notifier_list_init(&bs->close_notifiers);
notifier_with_return_list_init(&bs->before_write_notifiers);
- qemu_co_queue_init(&bs->throttled_reqs);
+ qemu_co_queue_init(&bs->throttled_reqs[0]);
+ qemu_co_queue_init(&bs->throttled_reqs[1]);
+ bs->refcnt = 1;
return bs;
}
@@ -876,7 +903,7 @@ fail:
if (!bs->drv) {
QDECREF(bs->options);
}
- bdrv_delete(bs);
+ bdrv_unref(bs);
return ret;
}
@@ -927,7 +954,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
*backing_filename ? backing_filename : NULL, options,
back_flags, back_drv);
if (ret < 0) {
- bdrv_delete(bs->backing_hd);
+ bdrv_unref(bs->backing_hd);
bs->backing_hd = NULL;
bs->open_flags |= BDRV_O_NO_BACKING;
return ret;
@@ -1002,12 +1029,12 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
bs1 = bdrv_new("");
ret = bdrv_open(bs1, filename, NULL, 0, drv);
if (ret < 0) {
- bdrv_delete(bs1);
+ bdrv_unref(bs1);
goto fail;
}
total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
- bdrv_delete(bs1);
+ bdrv_unref(bs1);
ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
if (ret < 0) {
@@ -1081,7 +1108,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
}
if (bs->file != file) {
- bdrv_delete(file);
+ bdrv_unref(file);
file = NULL;
}
@@ -1112,16 +1139,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
bdrv_dev_change_media_cb(bs, true);
}
- /* throttling disk I/O limits */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_enable(bs);
- }
-
return 0;
unlink_and_fail:
if (file != NULL) {
- bdrv_delete(file);
+ bdrv_unref(file);
}
if (bs->is_temporary) {
unlink(filename);
@@ -1382,7 +1404,7 @@ void bdrv_close(BlockDriverState *bs)
if (bs->drv) {
if (bs->backing_hd) {
- bdrv_delete(bs->backing_hd);
+ bdrv_unref(bs->backing_hd);
bs->backing_hd = NULL;
}
bs->drv->bdrv_close(bs);
@@ -1407,7 +1429,7 @@ void bdrv_close(BlockDriverState *bs)
bs->options = NULL;
if (bs->file != NULL) {
- bdrv_delete(bs->file);
+ bdrv_unref(bs->file);
bs->file = NULL;
}
}
@@ -1435,7 +1457,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
- if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
+ return true;
+ }
+ if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
return true;
}
if (bs->file && bdrv_requests_pending(bs->file)) {
@@ -1481,7 +1506,7 @@ void bdrv_drain_all(void)
* a busy wait.
*/
QTAILQ_FOREACH(bs, &bdrv_states, list) {
- while (qemu_co_enter_next(&bs->throttled_reqs)) {
+ if (bdrv_start_throttled_reqs(bs)) {
busy = true;
}
}
@@ -1523,13 +1548,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
bs_dest->enable_write_cache = bs_src->enable_write_cache;
- /* i/o timing parameters */
- bs_dest->slice_start = bs_src->slice_start;
- bs_dest->slice_end = bs_src->slice_end;
- bs_dest->slice_submitted = bs_src->slice_submitted;
- bs_dest->io_limits = bs_src->io_limits;
- bs_dest->throttled_reqs = bs_src->throttled_reqs;
- bs_dest->block_timer = bs_src->block_timer;
+ /* i/o throttled req */
+ memcpy(&bs_dest->throttle_state,
+ &bs_src->throttle_state,
+ sizeof(ThrottleState));
+ bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
+ bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
/* r/w error */
@@ -1543,6 +1567,9 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
/* dirty bitmap */
bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
+ /* reference count */
+ bs_dest->refcnt = bs_src->refcnt;
+
/* job */
bs_dest->in_use = bs_src->in_use;
bs_dest->job = bs_src->job;
@@ -1576,7 +1603,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
assert(bs_new->dev == NULL);
assert(bs_new->in_use == 0);
assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
+ assert(!throttle_have_timer(&bs_new->throttle_state));
tmp = *bs_new;
*bs_new = *bs_old;
@@ -1595,7 +1622,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
assert(bs_new->job == NULL);
assert(bs_new->in_use == 0);
assert(bs_new->io_limits_enabled == false);
- assert(bs_new->block_timer == NULL);
+ assert(!throttle_have_timer(&bs_new->throttle_state));
bdrv_rebind(bs_new);
bdrv_rebind(bs_old);
@@ -1626,11 +1653,12 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
bs_new->drv ? bs_new->drv->format_name : "");
}
-void bdrv_delete(BlockDriverState *bs)
+static void bdrv_delete(BlockDriverState *bs)
{
assert(!bs->dev);
assert(!bs->job);
assert(!bs->in_use);
+ assert(!bs->refcnt);
bdrv_close(bs);
@@ -1829,8 +1857,11 @@ int bdrv_commit(BlockDriverState *bs)
buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
for (sector = 0; sector < total_sectors; sector += n) {
- if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
-
+ ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ if (ret) {
if (bdrv_read(bs, sector, buf, n) != 0) {
ret = -EIO;
goto ro_cleanup;
@@ -2146,7 +2177,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
/* so that bdrv_close() does not recursively close the chain */
intermediate_state->bs->backing_hd = NULL;
- bdrv_delete(intermediate_state->bs);
+ bdrv_unref(intermediate_state->bs);
}
ret = 0;
@@ -2538,11 +2569,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
return -EIO;
}
- /* throttling disk read I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, false, nb_sectors);
- }
-
if (bs->copy_on_read) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -2554,12 +2580,17 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
wait_for_overlapping_requests(bs, sector_num, nb_sectors);
}
+ /* throttling disk I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, nb_sectors, false);
+ }
+
tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
if (flags & BDRV_REQ_COPY_ON_READ) {
int pnum;
- ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
@@ -2679,15 +2710,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
return -EIO;
}
- /* throttling disk write I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, true, nb_sectors);
- }
-
if (bs->copy_on_read_in_flight) {
wait_for_overlapping_requests(bs, sector_num, nb_sectors);
}
+ /* throttling disk I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, nb_sectors, true);
+ }
+
tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
@@ -2711,6 +2742,9 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
bs->wr_highest_sector = sector_num + nb_sectors - 1;
}
+ if (bs->growable && ret >= 0) {
+ bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ }
tracked_request_end(&req);
@@ -2785,7 +2819,7 @@ int64_t bdrv_getlength(BlockDriverState *bs)
if (!drv)
return -ENOMEDIUM;
- if (bs->growable || bdrv_dev_has_removable_media(bs)) {
+ if (bdrv_dev_has_removable_media(bs)) {
if (drv->bdrv_getlength) {
return drv->bdrv_getlength(bs);
}
@@ -2805,14 +2839,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
*nb_sectors_ptr = length;
}
-/* throttling disk io limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits)
-{
- bs->io_limits = *io_limits;
- bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
-}
-
void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
BlockdevOnError on_write_error)
{
@@ -3005,6 +3031,11 @@ int bdrv_has_zero_init(BlockDriverState *bs)
{
assert(bs->drv);
+ /* If BS is a copy on write image, it is initialized to
+ the contents of the base image, which may not be zeroes. */
+ if (bs->backing_hd) {
+ return 0;
+ }
if (bs->drv->bdrv_has_zero_init) {
return bs->drv->bdrv_has_zero_init(bs);
}
@@ -3013,15 +3044,15 @@ int bdrv_has_zero_init(BlockDriverState *bs)
return 0;
}
-typedef struct BdrvCoIsAllocatedData {
+typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
int64_t sector_num;
int nb_sectors;
int *pnum;
- int ret;
+ int64_t ret;
bool done;
-} BdrvCoIsAllocatedData;
+} BdrvCoGetBlockStatusData;
/*
* Returns true iff the specified sector is present in the disk image. Drivers
@@ -3038,12 +3069,20 @@ typedef struct BdrvCoIsAllocatedData {
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*/
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
{
+ int64_t length;
int64_t n;
+ int64_t ret, ret2;
+
+ length = bdrv_getlength(bs);
+ if (length < 0) {
+ return length;
+ }
- if (sector_num >= bs->total_sectors) {
+ if (sector_num >= (length >> BDRV_SECTOR_BITS)) {
*pnum = 0;
return 0;
}
@@ -3053,35 +3092,69 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
nb_sectors = n;
}
- if (!bs->drv->bdrv_co_is_allocated) {
+ if (!bs->drv->bdrv_co_get_block_status) {
*pnum = nb_sectors;
- return 1;
+ ret = BDRV_BLOCK_DATA;
+ if (bs->drv->protocol_name) {
+ ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+ }
+ return ret;
+ }
+
+ ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (!(ret & BDRV_BLOCK_DATA)) {
+ if (bdrv_has_zero_init(bs)) {
+ ret |= BDRV_BLOCK_ZERO;
+ } else {
+ BlockDriverState *bs2 = bs->backing_hd;
+ int64_t length2 = bdrv_getlength(bs2);
+ if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) {
+ ret |= BDRV_BLOCK_ZERO;
+ }
+ }
+ }
+
+ if (bs->file &&
+ (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
+ (ret & BDRV_BLOCK_OFFSET_VALID)) {
+ ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+ *pnum, pnum);
+ if (ret2 >= 0) {
+ /* Ignore errors. This is just providing extra information, it
+ * is useful but not necessary.
+ */
+ ret |= (ret2 & BDRV_BLOCK_ZERO);
+ }
}
- return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
+ return ret;
}
-/* Coroutine wrapper for bdrv_is_allocated() */
-static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
+/* Coroutine wrapper for bdrv_get_block_status() */
+static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
{
- BdrvCoIsAllocatedData *data = opaque;
+ BdrvCoGetBlockStatusData *data = opaque;
BlockDriverState *bs = data->bs;
- data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
- data->pnum);
+ data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
+ data->pnum);
data->done = true;
}
/*
- * Synchronous wrapper around bdrv_co_is_allocated().
+ * Synchronous wrapper around bdrv_co_get_block_status().
*
- * See bdrv_co_is_allocated() for details.
+ * See bdrv_co_get_block_status() for details.
*/
-int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
{
Coroutine *co;
- BdrvCoIsAllocatedData data = {
+ BdrvCoGetBlockStatusData data = {
.bs = bs,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
@@ -3089,14 +3162,31 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
.done = false,
};
- co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_get_block_status_co_entry(&data);
+ } else {
+ co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
}
return data.ret;
}
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+ if (ret < 0) {
+ return ret;
+ }
+ return
+ (ret & BDRV_BLOCK_DATA) ||
+ ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs));
+}
+
/*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
@@ -3109,10 +3199,10 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
* allocated/unallocated state.
*
*/
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum)
+int bdrv_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
{
BlockDriverState *intermediate;
int ret, n = nb_sectors;
@@ -3120,8 +3210,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
intermediate = top;
while (intermediate && intermediate != base) {
int pnum_inter;
- ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
- &pnum_inter);
+ ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
+ &pnum_inter);
if (ret < 0) {
return ret;
} else if (ret) {
@@ -3148,44 +3238,6 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
return 0;
}
-/* Coroutine wrapper for bdrv_is_allocated_above() */
-static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
-{
- BdrvCoIsAllocatedData *data = opaque;
- BlockDriverState *top = data->bs;
- BlockDriverState *base = data->base;
-
- data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
- data->nb_sectors, data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_is_allocated_above().
- *
- * See bdrv_co_is_allocated_above() for details.
- */
-int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t sector_num, int nb_sectors, int *pnum)
-{
- Coroutine *co;
- BdrvCoIsAllocatedData data = {
- .bs = top,
- .base = base,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- qemu_aio_wait();
- }
- return data.ret;
-}
-
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
{
if (bs->backing_hd && bs->backing_hd->encrypted)
@@ -3622,169 +3674,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
acb->aiocb_info->cancel(acb);
}
-/* block I/O throttling */
-static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, double elapsed_time, uint64_t *wait)
-{
- uint64_t bps_limit = 0;
- uint64_t extension;
- double bytes_limit, bytes_base, bytes_res;
- double slice_time, wait_time;
-
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.bps[is_write]) {
- bps_limit = bs->io_limits.bps[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- bytes_limit = bps_limit * slice_time;
- bytes_base = bs->slice_submitted.bytes[is_write];
- if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
- bytes_base += bs->slice_submitted.bytes[!is_write];
- }
-
- /* bytes_base: the bytes of data which have been read/written; and
- * it is obtained from the history statistic info.
- * bytes_res: the remaining bytes of data which need to be read/written.
- * (bytes_base + bytes_res) / bps_limit: used to calcuate
- * the total time for completing reading/writting all data.
- */
- bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
-
- if (bytes_base + bytes_res <= bytes_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch */
- wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
-
- /* When the I/O rate at runtime exceeds the limits,
- * bs->slice_end need to be extended in order that the current statistic
- * info can be kept until the timer fire, so it is increased and tuned
- * based on the result of experiment.
- */
- extension = wait_time * NANOSECONDS_PER_SECOND;
- extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
- BLOCK_IO_SLICE_TIME;
- bs->slice_end += extension;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
- double elapsed_time, uint64_t *wait)
-{
- uint64_t iops_limit = 0;
- double ios_limit, ios_base;
- double slice_time, wait_time;
-
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
- } else if (bs->io_limits.iops[is_write]) {
- iops_limit = bs->io_limits.iops[is_write];
- } else {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- slice_time = bs->slice_end - bs->slice_start;
- slice_time /= (NANOSECONDS_PER_SECOND);
- ios_limit = iops_limit * slice_time;
- ios_base = bs->slice_submitted.ios[is_write];
- if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
- ios_base += bs->slice_submitted.ios[!is_write];
- }
-
- if (ios_base + 1 <= ios_limit) {
- if (wait) {
- *wait = 0;
- }
-
- return false;
- }
-
- /* Calc approx time to dispatch, in seconds */
- wait_time = (ios_base + 1) / iops_limit;
- if (wait_time > elapsed_time) {
- wait_time = wait_time - elapsed_time;
- } else {
- wait_time = 0;
- }
-
- /* Exceeded current slice, extend it by another slice time */
- bs->slice_end += BLOCK_IO_SLICE_TIME;
- if (wait) {
- *wait = wait_time * NANOSECONDS_PER_SECOND;
- }
-
- return true;
-}
-
-static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
- bool is_write, int64_t *wait)
-{
- int64_t now, max_wait;
- uint64_t bps_wait = 0, iops_wait = 0;
- double elapsed_time;
- int bps_ret, iops_ret;
-
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (now > bs->slice_end) {
- bs->slice_start = now;
- bs->slice_end = now + BLOCK_IO_SLICE_TIME;
- memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
- }
-
- elapsed_time = now - bs->slice_start;
- elapsed_time /= (NANOSECONDS_PER_SECOND);
-
- bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
- is_write, elapsed_time, &bps_wait);
- iops_ret = bdrv_exceed_iops_limits(bs, is_write,
- elapsed_time, &iops_wait);
- if (bps_ret || iops_ret) {
- max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
- if (wait) {
- *wait = max_wait;
- }
-
- now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- if (bs->slice_end < now + max_wait) {
- bs->slice_end = now + max_wait;
- }
-
- return true;
- }
-
- if (wait) {
- *wait = 0;
- }
-
- bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
- BDRV_SECTOR_SIZE;
- bs->slice_submitted.ios[is_write]++;
-
- return false;
-}
-
/**************************************************************/
/* async block device emulation */
@@ -4445,6 +4334,23 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs)
}
}
+/* Get a reference to bs */
+void bdrv_ref(BlockDriverState *bs)
+{
+ bs->refcnt++;
+}
+
+/* Release a previously grabbed reference to bs.
+ * If after releasing, reference count is zero, the BlockDriverState is
+ * deleted. */
+void bdrv_unref(BlockDriverState *bs)
+{
+ assert(bs->refcnt > 0);
+ if (--bs->refcnt == 0) {
+ bdrv_delete(bs);
+ }
+}
+
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
{
assert(bs->in_use != in_use);
@@ -4658,7 +4564,7 @@ out:
free_option_parameters(param);
if (bs) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
}
}
diff --git a/block/backup.c b/block/backup.c
index 23c7264488..04c4b5c263 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -289,14 +289,14 @@ static void coroutine_fn backup_run(void *opaque)
* backing file. */
for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
- /* bdrv_co_is_allocated() only returns true/false based
+ /* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
* For that reason we must verify each sector in the
* backup cluster length. We end up copying more than
* needed but at some point that is always the case. */
alloced =
- bdrv_co_is_allocated(bs,
+ bdrv_is_allocated(bs,
start * BACKUP_SECTORS_PER_CLUSTER + i,
BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n;
@@ -338,7 +338,7 @@ static void coroutine_fn backup_run(void *opaque)
hbitmap_free(job->bitmap);
bdrv_iostatus_disable(target);
- bdrv_delete(target);
+ bdrv_unref(target);
block_job_completed(&job->common, ret);
}
diff --git a/block/blkverify.c b/block/blkverify.c
index 1d58cc3932..c4e961eeb1 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -155,7 +155,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags)
s->test_file = bdrv_new("");
ret = bdrv_open(s->test_file, filename, NULL, flags, NULL);
if (ret < 0) {
- bdrv_delete(s->test_file);
+ bdrv_unref(s->test_file);
s->test_file = NULL;
goto fail;
}
@@ -169,7 +169,7 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_delete(s->test_file);
+ bdrv_unref(s->test_file);
s->test_file = NULL;
}
diff --git a/block/commit.c b/block/commit.c
index 51a1ab3678..ac4b7ccbc9 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -108,9 +108,9 @@ wait:
break;
}
/* Copy if allocated above the base */
- ret = bdrv_co_is_allocated_above(top, base, sector_num,
- COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
- &n);
+ ret = bdrv_is_allocated_above(top, base, sector_num,
+ COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+ &n);
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
if (copy) {
diff --git a/block/cow.c b/block/cow.c
index 1cc2e89c7c..764b93fae0 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -106,7 +106,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags)
* XXX(hch): right now these functions are extremely inefficient.
* We should just read the whole bitmap we'll need in one go instead.
*/
-static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first)
{
uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
uint8_t bitmap;
@@ -117,27 +117,52 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
return ret;
}
+ if (bitmap & (1 << (bitnum % 8))) {
+ return 0;
+ }
+
+ if (*first) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ return ret;
+ }
+ *first = false;
+ }
+
bitmap |= (1 << (bitnum % 8));
- ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+ ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap));
if (ret < 0) {
return ret;
}
return 0;
}
-static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
+#define BITS_PER_BITMAP_SECTOR (512 * 8)
+
+/* Cannot use bitmap.c on big-endian machines. */
+static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
{
- uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
- uint8_t bitmap;
- int ret;
+ return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
+}
- ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
- if (ret < 0) {
- return ret;
+static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors)
+{
+ int streak_value = value ? 0xFF : 0;
+ int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR);
+ int bitnum = start;
+ while (bitnum < last) {
+ if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) {
+ bitnum += 8;
+ continue;
+ }
+ if (cow_test_bit(bitnum, bitmap) == value) {
+ bitnum++;
+ continue;
+ }
+ break;
}
-
- return !!(bitmap & (1 << (bitnum % 8)));
+ return MIN(bitnum, last) - start;
}
/* Return true if first block has been changed (ie. current version is
@@ -146,24 +171,33 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *num_same)
{
+ int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8;
+ uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE;
+ uint8_t bitmap[BDRV_SECTOR_SIZE];
+ int ret;
int changed;
- if (nb_sectors == 0) {
- *num_same = nb_sectors;
- return 0;
+ ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
}
- changed = is_bit_set(bs, sector_num);
- if (changed < 0) {
- return 0; /* XXX: how to return I/O errors? */
- }
+ bitnum &= BITS_PER_BITMAP_SECTOR - 1;
+ changed = cow_test_bit(bitnum, bitmap);
+ *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors);
+ return changed;
+}
- for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
- if (is_bit_set(bs, sector_num + *num_same) != changed)
- break;
+static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *num_same)
+{
+ BDRVCowState *s = bs->opaque;
+ int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same);
+ int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS);
+ if (ret < 0) {
+ return ret;
}
-
- return changed;
+ return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID;
}
static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
@@ -171,9 +205,10 @@ static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
{
int error = 0;
int i;
+ bool first = true;
for (i = 0; i < nb_sectors; i++) {
- error = cow_set_bit(bs, sector_num + i);
+ error = cow_set_bit(bs, sector_num + i, &first);
if (error) {
break;
}
@@ -189,7 +224,11 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
int ret, n;
while (nb_sectors > 0) {
- if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) {
+ ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret) {
ret = bdrv_pread(bs->file,
s->cow_sectors_offset + sector_num * 512,
buf, n * 512);
@@ -314,7 +353,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
}
exit:
- bdrv_delete(cow_bs);
+ bdrv_unref(cow_bs);
return ret;
}
@@ -344,7 +383,7 @@ static BlockDriver bdrv_cow = {
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
- .bdrv_co_is_allocated = cow_co_is_allocated,
+ .bdrv_co_get_block_status = cow_co_get_block_status,
.create_options = cow_create_options,
};
diff --git a/block/iscsi.c b/block/iscsi.c
index 2bbee1f6e5..813abd8fef 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1241,11 +1241,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
{
int ret = 0;
int64_t total_size = 0;
- BlockDriverState bs;
+ BlockDriverState *bs;
IscsiLun *iscsilun = NULL;
QDict *bs_options;
- memset(&bs, 0, sizeof(BlockDriverState));
+ bs = bdrv_new("");
/* Read out options */
while (options && options->name) {
@@ -1255,12 +1255,12 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
options++;
}
- bs.opaque = g_malloc0(sizeof(struct IscsiLun));
- iscsilun = bs.opaque;
+ bs->opaque = g_malloc0(sizeof(struct IscsiLun));
+ iscsilun = bs->opaque;
bs_options = qdict_new();
qdict_put(bs_options, "filename", qstring_from_str(filename));
- ret = iscsi_open(&bs, bs_options, 0);
+ ret = iscsi_open(bs, bs_options, 0);
QDECREF(bs_options);
if (ret != 0) {
@@ -1274,7 +1274,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options)
ret = -ENODEV;
goto out;
}
- if (bs.total_sectors < total_size) {
+ if (bs->total_sectors < total_size) {
ret = -ENOSPC;
goto out;
}
@@ -1284,7 +1284,9 @@ out:
if (iscsilun->iscsi != NULL) {
iscsi_destroy_context(iscsilun->iscsi);
}
- g_free(bs.opaque);
+ g_free(bs->opaque);
+ bs->opaque = NULL;
+ bdrv_unref(bs);
return ret;
}
diff --git a/block/mirror.c b/block/mirror.c
index 86de4582b4..f61a7799de 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -338,8 +338,8 @@ static void coroutine_fn mirror_run(void *opaque)
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
for (sector_num = 0; sector_num < end; ) {
int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
- ret = bdrv_co_is_allocated_above(bs, base,
- sector_num, next - sector_num, &n);
+ ret = bdrv_is_allocated_above(bs, base,
+ sector_num, next - sector_num, &n);
if (ret < 0) {
goto immediate_exit;
@@ -480,7 +480,7 @@ immediate_exit:
bdrv_swap(s->target, s->common.bs);
}
bdrv_close(s->target);
- bdrv_delete(s->target);
+ bdrv_unref(s->target);
block_job_completed(&s->common, ret);
}
diff --git a/block/qapi.c b/block/qapi.c
index a4bc4113b7..782051c65d 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -223,18 +223,44 @@ void bdrv_query_info(BlockDriverState *bs,
info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
if (bs->io_limits_enabled) {
- info->inserted->bps =
- bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
- info->inserted->bps_rd =
- bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
- info->inserted->bps_wr =
- bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
- info->inserted->iops =
- bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
- info->inserted->iops_rd =
- bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
- info->inserted->iops_wr =
- bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
+ ThrottleConfig cfg;
+ throttle_get_config(&bs->throttle_state, &cfg);
+ info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
+ info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
+ info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg;
+
+ info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
+ info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
+ info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
+
+ info->inserted->has_bps_max =
+ cfg.buckets[THROTTLE_BPS_TOTAL].max;
+ info->inserted->bps_max =
+ cfg.buckets[THROTTLE_BPS_TOTAL].max;
+ info->inserted->has_bps_rd_max =
+ cfg.buckets[THROTTLE_BPS_READ].max;
+ info->inserted->bps_rd_max =
+ cfg.buckets[THROTTLE_BPS_READ].max;
+ info->inserted->has_bps_wr_max =
+ cfg.buckets[THROTTLE_BPS_WRITE].max;
+ info->inserted->bps_wr_max =
+ cfg.buckets[THROTTLE_BPS_WRITE].max;
+
+ info->inserted->has_iops_max =
+ cfg.buckets[THROTTLE_OPS_TOTAL].max;
+ info->inserted->iops_max =
+ cfg.buckets[THROTTLE_OPS_TOTAL].max;
+ info->inserted->has_iops_rd_max =
+ cfg.buckets[THROTTLE_OPS_READ].max;
+ info->inserted->iops_rd_max =
+ cfg.buckets[THROTTLE_OPS_READ].max;
+ info->inserted->has_iops_wr_max =
+ cfg.buckets[THROTTLE_OPS_WRITE].max;
+ info->inserted->iops_wr_max =
+ cfg.buckets[THROTTLE_OPS_WRITE].max;
+
+ info->inserted->has_iops_size = cfg.op_size;
+ info->inserted->iops_size = cfg.op_size;
}
bs0 = bs;
diff --git a/block/qcow.c b/block/qcow.c
index 5239bd68f1..93a993bb44 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -395,7 +395,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
return cluster_offset;
}
-static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
@@ -410,7 +410,14 @@ static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
if (n > nb_sectors)
n = nb_sectors;
*pnum = n;
- return (cluster_offset != 0);
+ if (!cluster_offset) {
+ return 0;
+ }
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
+ return BDRV_BLOCK_DATA;
+ }
+ cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@@ -751,7 +758,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
g_free(tmp);
ret = 0;
exit:
- bdrv_delete(qcow_bs);
+ bdrv_unref(qcow_bs);
return ret;
}
@@ -896,7 +903,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
- .bdrv_co_is_allocated = qcow_co_is_allocated,
+ .bdrv_co_get_block_status = qcow_co_get_block_status,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
diff --git a/block/qcow2.c b/block/qcow2.c
index 4bc679a155..578792f0a3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -688,24 +688,34 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
return 0;
}
-static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVQcowState *s = bs->opaque;
uint64_t cluster_offset;
- int ret;
+ int index_in_cluster, ret;
+ int64_t status = 0;
*pnum = nb_sectors;
- /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
- * can't pass them on today */
qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
- *pnum = 0;
+ return ret;
}
- return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
+ if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
+ !s->crypt_method) {
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
+ }
+ if (ret == QCOW2_CLUSTER_ZERO) {
+ status |= BDRV_BLOCK_ZERO;
+ } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
+ status |= BDRV_BLOCK_DATA;
+ }
+ return status;
}
/* handle reading after the end of the backing file */
@@ -1452,7 +1462,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
ret = 0;
out:
- bdrv_delete(bs);
+ bdrv_unref(bs);
return ret;
}
@@ -1868,7 +1878,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_reopen_prepare = qcow2_reopen_prepare,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = qcow2_co_is_allocated,
+ .bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/qed.c b/block/qed.c
index cc904c4834..49b3a37ed5 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -599,7 +599,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
- bdrv_delete(bs);
+ bdrv_unref(bs);
return ret;
}
@@ -652,45 +652,66 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
}
typedef struct {
+ BlockDriverState *bs;
Coroutine *co;
- int is_allocated;
+ uint64_t pos;
+ int64_t status;
int *pnum;
} QEDIsAllocatedCB;
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
{
QEDIsAllocatedCB *cb = opaque;
+ BDRVQEDState *s = cb->bs->opaque;
*cb->pnum = len / BDRV_SECTOR_SIZE;
- cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+ switch (ret) {
+ case QED_CLUSTER_FOUND:
+ offset |= qed_offset_into_cluster(s, cb->pos);
+ cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ break;
+ case QED_CLUSTER_ZERO:
+ cb->status = BDRV_BLOCK_ZERO;
+ break;
+ case QED_CLUSTER_L2:
+ case QED_CLUSTER_L1:
+ cb->status = 0;
+ break;
+ default:
+ assert(ret < 0);
+ cb->status = ret;
+ break;
+ }
+
if (cb->co) {
qemu_coroutine_enter(cb->co, NULL);
}
}
-static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
BDRVQEDState *s = bs->opaque;
- uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
QEDIsAllocatedCB cb = {
- .is_allocated = -1,
+ .bs = bs,
+ .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
+ .status = BDRV_BLOCK_OFFSET_MASK,
.pnum = pnum,
};
QEDRequest request = { .l2_table = NULL };
- qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+ qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
/* Now sleep if the callback wasn't invoked immediately */
- while (cb.is_allocated == -1) {
+ while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
cb.co = qemu_coroutine_self();
qemu_coroutine_yield();
}
qed_unref_l2_cache_entry(request.l2_table);
- return cb.is_allocated;
+ return cb.status;
}
static int bdrv_qed_make_empty(BlockDriverState *bs)
@@ -1575,7 +1596,7 @@ static BlockDriver bdrv_qed = {
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
.bdrv_create = bdrv_qed_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
+ .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
.bdrv_make_empty = bdrv_qed_make_empty,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index ba721d3f5b..1b41ea3356 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1084,12 +1084,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
*/
-static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
off_t start, data, hole;
- int ret;
+ int64_t ret;
ret = fd_open(bs);
if (ret < 0) {
@@ -1097,6 +1097,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
}
start = sector_num * BDRV_SECTOR_SIZE;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
#ifdef CONFIG_FIEMAP
@@ -1114,7 +1115,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
/* Assume everything is allocated. */
*pnum = nb_sectors;
- return 1;
+ return ret;
}
if (f.fm.fm_mapped_extents == 0) {
@@ -1127,6 +1128,9 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
} else {
data = f.fe.fe_logical;
hole = f.fe.fe_logical + f.fe.fe_length;
+ if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
+ ret |= BDRV_BLOCK_ZERO;
+ }
}
#elif defined SEEK_HOLE && defined SEEK_DATA
@@ -1141,7 +1145,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
/* Most likely EINVAL. Assume everything is allocated. */
*pnum = nb_sectors;
- return 1;
+ return ret;
}
if (hole > start) {
@@ -1154,19 +1158,21 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
}
}
#else
- *pnum = nb_sectors;
- return 1;
+ data = 0;
+ hole = start + nb_sectors * BDRV_SECTOR_SIZE;
#endif
if (data <= start) {
/* On a data extent, compute sectors to the end of the extent. */
*pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
- return 1;
} else {
/* On a hole, compute sectors to the beginning of the next extent. */
*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
- return 0;
+ ret &= ~BDRV_BLOCK_DATA;
+ ret |= BDRV_BLOCK_ZERO;
}
+
+ return ret;
}
static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
@@ -1200,7 +1206,7 @@ static BlockDriver bdrv_file = {
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = raw_co_is_allocated,
+ .bdrv_co_get_block_status = raw_co_get_block_status,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 9b5b2af4e8..d2d2d9f4d4 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -535,13 +535,29 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
{
BDRVRawState *s = bs->opaque;
int access_flags, create_flags;
+ int ret = 0;
DWORD overlapped;
char device_name[64];
- const char *filename = qdict_get_str(options, "filename");
+
+ Error *local_err = NULL;
+ const char *filename;
+
+ QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ filename = qemu_opt_get(opts, "filename");
if (strstart(filename, "/dev/cdrom", NULL)) {
- if (find_cdrom(device_name, sizeof(device_name)) < 0)
- return -ENOENT;
+ if (find_cdrom(device_name, sizeof(device_name)) < 0) {
+ ret = -ENOENT;
+ goto done;
+ }
filename = device_name;
} else {
/* transform drive letters into device name */
@@ -564,11 +580,17 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags)
if (s->hfile == INVALID_HANDLE_VALUE) {
int err = GetLastError();
- if (err == ERROR_ACCESS_DENIED)
- return -EACCES;
- return -1;
+ if (err == ERROR_ACCESS_DENIED) {
+ ret = -EACCES;
+ } else {
+ ret = -1;
+ }
+ goto done;
}
- return 0;
+
+done:
+ qemu_opts_del(opts);
+ return ret;
}
static BlockDriver bdrv_host_device = {
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index ab2b0fd7d2..a9060caec4 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -58,11 +58,11 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
}
-static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int *pnum)
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
{
- return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
+ return bdrv_get_block_status(bs->file, sector_num, nb_sectors, pnum);
}
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
@@ -164,7 +164,7 @@ static BlockDriver bdrv_raw = {
.bdrv_co_writev = &raw_co_writev,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_discard = &raw_co_discard,
- .bdrv_co_is_allocated = &raw_co_is_allocated,
+ .bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.bdrv_get_info = &raw_get_info,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 1ad4d070e7..f9988d35ba 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1430,7 +1430,7 @@ static int sd_prealloc(const char *filename)
}
out:
if (bs) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
}
g_free(buf);
@@ -1509,13 +1509,13 @@ static int sd_create(const char *filename, QEMUOptionParameter *options)
if (!is_snapshot(&s->inode)) {
error_report("cannot clone from a non snapshot vdi");
- bdrv_delete(bs);
+ bdrv_unref(bs);
ret = -EINVAL;
goto out;
}
base_vid = s->inode.vdi_id;
- bdrv_delete(bs);
+ bdrv_unref(bs);
}
ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0);
@@ -2270,9 +2270,9 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
return acb->ret;
}
-static coroutine_fn int
-sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
+static coroutine_fn int64_t
+sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
@@ -2280,7 +2280,7 @@ sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
end = DIV_ROUND_UP((sector_num + nb_sectors) *
BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
unsigned long idx;
- int ret = 1;
+ int64_t ret = BDRV_BLOCK_DATA;
for (idx = start; idx < end; idx++) {
if (inode->data_vdi_id[idx] == 0) {
@@ -2338,7 +2338,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
- .bdrv_co_is_allocated = sd_co_is_allocated,
+ .bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@@ -2366,7 +2366,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
- .bdrv_co_is_allocated = sd_co_is_allocated,
+ .bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
@@ -2394,7 +2394,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
.bdrv_co_discard = sd_co_discard,
- .bdrv_co_is_allocated = sd_co_is_allocated,
+ .bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
.bdrv_snapshot_goto = sd_snapshot_goto,
diff --git a/block/snapshot.c b/block/snapshot.c
index 6c6d9deea1..8f61cc0745 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -99,7 +99,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
ret = bdrv_snapshot_goto(bs->file, snapshot_id);
open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
if (open_ret < 0) {
- bdrv_delete(bs->file);
+ bdrv_unref(bs->file);
bs->drv = NULL;
return open_ret;
}
diff --git a/block/stream.c b/block/stream.c
index 99821252b1..078ce4aa6a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -73,7 +73,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
unused = intermediate;
intermediate = intermediate->backing_hd;
unused->backing_hd = NULL;
- bdrv_delete(unused);
+ bdrv_unref(unused);
}
}
@@ -119,16 +119,16 @@ wait:
break;
}
- ret = bdrv_co_is_allocated(bs, sector_num,
- STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
+ ret = bdrv_is_allocated(bs, sector_num,
+ STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
if (ret == 1) {
/* Allocated in the top, no need to copy. */
copy = false;
- } else {
+ } else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [sector_num, sector_num+n). */
- ret = bdrv_co_is_allocated_above(bs->backing_hd, base,
- sector_num, n, &n);
+ ret = bdrv_is_allocated_above(bs->backing_hd, base,
+ sector_num, n, &n);
/* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) {
diff --git a/block/vdi.c b/block/vdi.c
index 8a915257e8..1bf7dc575a 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -470,7 +470,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
return 0;
}
-static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
@@ -479,12 +479,23 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
size_t sector_in_block = sector_num % s->block_sectors;
int n_sectors = s->block_sectors - sector_in_block;
uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
+ uint64_t offset;
+ int result;
+
logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
if (n_sectors > nb_sectors) {
n_sectors = nb_sectors;
}
*pnum = n_sectors;
- return VDI_IS_ALLOCATED(bmap_entry);
+ result = VDI_IS_ALLOCATED(bmap_entry);
+ if (!result) {
+ return 0;
+ }
+
+ offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ sector_in_block * SECTOR_SIZE;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
static int vdi_co_read(BlockDriverState *bs,
@@ -780,7 +791,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_reopen_prepare = vdi_reopen_prepare,
.bdrv_create = vdi_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
- .bdrv_co_is_allocated = vdi_co_is_allocated,
+ .bdrv_co_get_block_status = vdi_co_get_block_status,
.bdrv_make_empty = vdi_make_empty,
.bdrv_read = vdi_co_read,
diff --git a/block/vmdk.c b/block/vmdk.c
index 63b489d29e..fb5b5297ce 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -216,7 +216,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
g_free(e->l2_cache);
g_free(e->l1_backup_table);
if (e->file != bs->file) {
- bdrv_delete(e->file);
+ bdrv_unref(e->file);
}
}
g_free(s->extents);
@@ -746,7 +746,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
/* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
ret = vmdk_open_sparse(bs, extent_file, bs->open_flags);
if (ret) {
- bdrv_delete(extent_file);
+ bdrv_unref(extent_file);
return ret;
}
} else {
@@ -1042,7 +1042,7 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
return NULL;
}
-static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
BDRVVmdkState *s = bs->opaque;
@@ -1059,7 +1059,24 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
sector_num * 512, 0, &offset);
qemu_co_mutex_unlock(&s->lock);
- ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
+ switch (ret) {
+ case VMDK_ERROR:
+ ret = -EIO;
+ break;
+ case VMDK_UNALLOC:
+ ret = 0;
+ break;
+ case VMDK_ZEROED:
+ ret = BDRV_BLOCK_ZERO;
+ break;
+ case VMDK_OK:
+ ret = BDRV_BLOCK_DATA;
+ if (extent->file == bs->file) {
+ ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+ }
+
+ break;
+ }
index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
@@ -1636,15 +1653,15 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
BlockDriverState *bs = bdrv_new("");
ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
if (ret != 0) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
return ret;
}
if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
return -EINVAL;
}
parent_cid = vmdk_read_cid(bs, 0);
- bdrv_delete(bs);
+ bdrv_unref(bs);
snprintf(parent_desc_line, sizeof(parent_desc_line),
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -1837,7 +1854,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
- .bdrv_co_is_allocated = vmdk_co_is_allocated,
+ .bdrv_co_get_block_status = vmdk_co_get_block_status,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.bdrv_has_zero_init = vmdk_has_zero_init,
diff --git a/block/vvfat.c b/block/vvfat.c
index cd3b8edd9f..0129195e29 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2874,16 +2874,17 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
-static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs,
+static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int* n)
{
BDRVVVFATState* s = bs->opaque;
*n = s->sector_count - sector_num;
- if (*n > nb_sectors)
- *n = nb_sectors;
- else if (*n < 0)
- return 0;
- return 1;
+ if (*n > nb_sectors) {
+ *n = nb_sectors;
+ } else if (*n < 0) {
+ return 0;
+ }
+ return BDRV_BLOCK_DATA;
}
static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
@@ -2894,7 +2895,7 @@ static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
static void write_target_close(BlockDriverState *bs) {
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
- bdrv_delete(s->qcow);
+ bdrv_unref(s->qcow);
g_free(s->qcow_filename);
}
@@ -2935,7 +2936,7 @@ static int enable_write_target(BDRVVVFATState *s)
ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) {
- bdrv_delete(s->qcow);
+ bdrv_unref(s->qcow);
goto err;
}
@@ -2943,7 +2944,7 @@ static int enable_write_target(BDRVVVFATState *s)
unlink(s->qcow_filename);
#endif
- s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1);
+ s->bs->backing_hd = bdrv_new("");
s->bs->backing_hd->drv = &vvfat_write_target;
s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
*(void**)s->bs->backing_hd->opaque = s;
@@ -2984,7 +2985,7 @@ static BlockDriver bdrv_vvfat = {
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
- .bdrv_co_is_allocated = vvfat_co_is_allocated,
+ .bdrv_co_get_block_status = vvfat_co_get_block_status,
};
static void bdrv_vvfat_init(void)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 95f10c81e3..922cf5657b 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -69,12 +69,6 @@ static void nbd_close_notifier(Notifier *n, void *data)
g_free(cn);
}
-static void nbd_server_put_ref(NBDExport *exp)
-{
- BlockDriverState *bs = nbd_export_get_blockdev(exp);
- drive_put_ref(drive_get_by_blockdev(bs));
-}
-
void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
Error **errp)
{
@@ -105,11 +99,9 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
writable = false;
}
- exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
- nbd_server_put_ref);
+ exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
nbd_export_set_name(exp, device);
- drive_get_ref(drive_get_by_blockdev(bs));
n = g_malloc0(sizeof(NBDCloseNotifier));
n->n.notify = nbd_close_notifier;
diff --git a/blockdev.c b/blockdev.c
index e70e16e4de..07dac05a2c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -212,7 +212,7 @@ static void bdrv_format_print(void *opaque, const char *name)
static void drive_uninit(DriveInfo *dinfo)
{
qemu_opts_del(dinfo->opts);
- bdrv_delete(dinfo->bdrv);
+ bdrv_unref(dinfo->bdrv);
g_free(dinfo->id);
QTAILQ_REMOVE(&drives, dinfo, next);
g_free(dinfo->serial);
@@ -234,32 +234,32 @@ void drive_get_ref(DriveInfo *dinfo)
typedef struct {
QEMUBH *bh;
- DriveInfo *dinfo;
-} DrivePutRefBH;
+ BlockDriverState *bs;
+} BDRVPutRefBH;
-static void drive_put_ref_bh(void *opaque)
+static void bdrv_put_ref_bh(void *opaque)
{
- DrivePutRefBH *s = opaque;
+ BDRVPutRefBH *s = opaque;
- drive_put_ref(s->dinfo);
+ bdrv_unref(s->bs);
qemu_bh_delete(s->bh);
g_free(s);
}
/*
- * Release a drive reference in a BH
+ * Release a BDS reference in a BH
*
- * It is not possible to use drive_put_ref() from a callback function when the
- * callers still need the drive. In such cases we schedule a BH to release the
- * reference.
+ * It is not safe to use bdrv_unref() from a callback function when the callers
+ * still need the BlockDriverState. In such cases we schedule a BH to release
+ * the reference.
*/
-static void drive_put_ref_bh_schedule(DriveInfo *dinfo)
+static void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
{
- DrivePutRefBH *s;
+ BDRVPutRefBH *s;
- s = g_new(DrivePutRefBH, 1);
- s->bh = qemu_bh_new(drive_put_ref_bh, s);
- s->dinfo = dinfo;
+ s = g_new(BDRVPutRefBH, 1);
+ s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
+ s->bs = bs;
qemu_bh_schedule(s->bh);
}
@@ -280,32 +280,16 @@ static int parse_block_error_action(const char *buf, bool is_read)
}
}
-static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp)
+static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
{
- bool bps_flag;
- bool iops_flag;
-
- assert(io_limits);
-
- bps_flag = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0)
- && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0)
- || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0));
- iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0)
- && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0)
- || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0));
- if (bps_flag || iops_flag) {
- error_setg(errp, "bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) "
- "cannot be used at the same time");
+ if (throttle_conflicting(cfg)) {
+ error_setg(errp, "bps/iops/max total values and read/write values"
+ " cannot be used at the same time");
return false;
}
- if (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] < 0 ||
- io_limits->bps[BLOCK_IO_LIMIT_WRITE] < 0 ||
- io_limits->bps[BLOCK_IO_LIMIT_READ] < 0 ||
- io_limits->iops[BLOCK_IO_LIMIT_TOTAL] < 0 ||
- io_limits->iops[BLOCK_IO_LIMIT_WRITE] < 0 ||
- io_limits->iops[BLOCK_IO_LIMIT_READ] < 0) {
- error_setg(errp, "bps and iops values must be 0 or greater");
+ if (!throttle_is_valid(cfg)) {
+ error_setg(errp, "bps/iops/maxs values must be 0 or greater");
return false;
}
@@ -330,7 +314,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
int on_read_error, on_write_error;
const char *devaddr;
DriveInfo *dinfo;
- BlockIOLimit io_limits;
+ ThrottleConfig cfg;
int snapshot = 0;
bool copy_on_read;
int ret;
@@ -496,20 +480,36 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
}
/* disk I/O throttling */
- io_limits.bps[BLOCK_IO_LIMIT_TOTAL] =
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.buckets[THROTTLE_BPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.bps-total", 0);
- io_limits.bps[BLOCK_IO_LIMIT_READ] =
+ cfg.buckets[THROTTLE_BPS_READ].avg =
qemu_opt_get_number(opts, "throttling.bps-read", 0);
- io_limits.bps[BLOCK_IO_LIMIT_WRITE] =
+ cfg.buckets[THROTTLE_BPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.bps-write", 0);
- io_limits.iops[BLOCK_IO_LIMIT_TOTAL] =
+ cfg.buckets[THROTTLE_OPS_TOTAL].avg =
qemu_opt_get_number(opts, "throttling.iops-total", 0);
- io_limits.iops[BLOCK_IO_LIMIT_READ] =
+ cfg.buckets[THROTTLE_OPS_READ].avg =
qemu_opt_get_number(opts, "throttling.iops-read", 0);
- io_limits.iops[BLOCK_IO_LIMIT_WRITE] =
+ cfg.buckets[THROTTLE_OPS_WRITE].avg =
qemu_opt_get_number(opts, "throttling.iops-write", 0);
- if (!do_check_io_limits(&io_limits, &error)) {
+ cfg.buckets[THROTTLE_BPS_TOTAL].max =
+ qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+ cfg.buckets[THROTTLE_BPS_READ].max =
+ qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+ cfg.buckets[THROTTLE_BPS_WRITE].max =
+ qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+ cfg.buckets[THROTTLE_OPS_TOTAL].max =
+ qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+ cfg.buckets[THROTTLE_OPS_READ].max =
+ qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+ cfg.buckets[THROTTLE_OPS_WRITE].max =
+ qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+ cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+ if (!check_throttle_config(&cfg, &error)) {
error_report("%s", error_get_pretty(error));
error_free(error);
return NULL;
@@ -636,7 +636,10 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error);
/* disk I/O throttling */
- bdrv_set_io_limits(dinfo->bdrv, &io_limits);
+ if (throttle_enabled(&cfg)) {
+ bdrv_io_limits_enable(dinfo->bdrv);
+ bdrv_set_io_limits(dinfo->bdrv, &cfg);
+ }
switch(type) {
case IF_IDE:
@@ -732,7 +735,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
err:
qemu_opts_del(opts);
QDECREF(bs_opts);
- bdrv_delete(dinfo->bdrv);
+ bdrv_unref(dinfo->bdrv);
g_free(dinfo->id);
QTAILQ_REMOVE(&drives, dinfo, next);
g_free(dinfo);
@@ -763,6 +766,17 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type)
qemu_opt_rename(all_opts, "bps_rd", "throttling.bps-read");
qemu_opt_rename(all_opts, "bps_wr", "throttling.bps-write");
+ qemu_opt_rename(all_opts, "iops_max", "throttling.iops-total-max");
+ qemu_opt_rename(all_opts, "iops_rd_max", "throttling.iops-read-max");
+ qemu_opt_rename(all_opts, "iops_wr_max", "throttling.iops-write-max");
+
+ qemu_opt_rename(all_opts, "bps_max", "throttling.bps-total-max");
+ qemu_opt_rename(all_opts, "bps_rd_max", "throttling.bps-read-max");
+ qemu_opt_rename(all_opts, "bps_wr_max", "throttling.bps-write-max");
+
+ qemu_opt_rename(all_opts,
+ "iops_size", "throttling.iops-size");
+
qemu_opt_rename(all_opts, "readonly", "read-only");
value = qemu_opt_get(all_opts, "cache");
@@ -982,7 +996,7 @@ static void external_snapshot_abort(BlkTransactionState *common)
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
if (state->new_bs) {
- bdrv_delete(state->new_bs);
+ bdrv_unref(state->new_bs);
}
}
@@ -1247,10 +1261,26 @@ void qmp_change_blockdev(const char *device, const char *filename,
/* throttling disk I/O limits */
void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
- int64_t bps_wr, int64_t iops, int64_t iops_rd,
- int64_t iops_wr, Error **errp)
+ int64_t bps_wr,
+ int64_t iops,
+ int64_t iops_rd,
+ int64_t iops_wr,
+ bool has_bps_max,
+ int64_t bps_max,
+ bool has_bps_rd_max,
+ int64_t bps_rd_max,
+ bool has_bps_wr_max,
+ int64_t bps_wr_max,
+ bool has_iops_max,
+ int64_t iops_max,
+ bool has_iops_rd_max,
+ int64_t iops_rd_max,
+ bool has_iops_wr_max,
+ int64_t iops_wr_max,
+ bool has_iops_size,
+ int64_t iops_size, Error **errp)
{
- BlockIOLimit io_limits;
+ ThrottleConfig cfg;
BlockDriverState *bs;
bs = bdrv_find(device);
@@ -1259,27 +1289,50 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
return;
}
- io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps;
- io_limits.bps[BLOCK_IO_LIMIT_READ] = bps_rd;
- io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr;
- io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops;
- io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd;
- io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr;
+ memset(&cfg, 0, sizeof(cfg));
+ cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
+ cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd;
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
- if (!do_check_io_limits(&io_limits, errp)) {
- return;
+ cfg.buckets[THROTTLE_OPS_TOTAL].avg = iops;
+ cfg.buckets[THROTTLE_OPS_READ].avg = iops_rd;
+ cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr;
+
+ if (has_bps_max) {
+ cfg.buckets[THROTTLE_BPS_TOTAL].max = bps_max;
+ }
+ if (has_bps_rd_max) {
+ cfg.buckets[THROTTLE_BPS_READ].max = bps_rd_max;
+ }
+ if (has_bps_wr_max) {
+ cfg.buckets[THROTTLE_BPS_WRITE].max = bps_wr_max;
+ }
+ if (has_iops_max) {
+ cfg.buckets[THROTTLE_OPS_TOTAL].max = iops_max;
+ }
+ if (has_iops_rd_max) {
+ cfg.buckets[THROTTLE_OPS_READ].max = iops_rd_max;
+ }
+ if (has_iops_wr_max) {
+ cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
}
- bs->io_limits = io_limits;
+ if (has_iops_size) {
+ cfg.op_size = iops_size;
+ }
+
+ if (!check_throttle_config(&cfg, errp)) {
+ return;
+ }
- if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) {
+ if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
bdrv_io_limits_enable(bs);
- } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) {
+ } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
bdrv_io_limits_disable(bs);
- } else {
- if (bs->block_timer) {
- timer_mod(bs->block_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
- }
+ }
+
+ if (bs->io_limits_enabled) {
+ bdrv_set_io_limits(bs, &cfg);
}
}
@@ -1383,7 +1436,7 @@ static void block_job_cb(void *opaque, int ret)
}
qobject_decref(obj);
- drive_put_ref_bh_schedule(drive_get_by_blockdev(bs));
+ bdrv_put_ref_bh_schedule(bs);
}
void qmp_block_stream(const char *device, bool has_base,
@@ -1420,11 +1473,6 @@ void qmp_block_stream(const char *device, bool has_base,
return;
}
- /* Grab a reference so hotplug does not delete the BlockDriverState from
- * underneath us.
- */
- drive_get_ref(drive_get_by_blockdev(bs));
-
trace_qmp_block_stream(bs, bs->job);
}
@@ -1481,10 +1529,6 @@ void qmp_block_commit(const char *device,
error_propagate(errp, local_err);
return;
}
- /* Grab a reference so hotplug does not delete the BlockDriverState from
- * underneath us.
- */
- drive_get_ref(drive_get_by_blockdev(bs));
}
void qmp_drive_backup(const char *device, const char *target,
@@ -1585,7 +1629,7 @@ void qmp_drive_backup(const char *device, const char *target,
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, NULL, flags, drv);
if (ret < 0) {
- bdrv_delete(target_bs);
+ bdrv_unref(target_bs);
error_setg_file_open(errp, -ret, target);
return;
}
@@ -1593,15 +1637,10 @@ void qmp_drive_backup(const char *device, const char *target,
backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
- bdrv_delete(target_bs);
+ bdrv_unref(target_bs);
error_propagate(errp, local_err);
return;
}
-
- /* Grab a reference so hotplug does not delete the BlockDriverState from
- * underneath us.
- */
- drive_get_ref(drive_get_by_blockdev(bs));
}
#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
@@ -1725,7 +1764,7 @@ void qmp_drive_mirror(const char *device, const char *target,
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv);
if (ret < 0) {
- bdrv_delete(target_bs);
+ bdrv_unref(target_bs);
error_setg_file_open(errp, -ret, target);
return;
}
@@ -1734,15 +1773,10 @@ void qmp_drive_mirror(const char *device, const char *target,
on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
- bdrv_delete(target_bs);
+ bdrv_unref(target_bs);
error_propagate(errp, local_err);
return;
}
-
- /* Grab a reference so hotplug does not delete the BlockDriverState from
- * underneath us.
- */
- drive_get_ref(drive_get_by_blockdev(bs));
}
static BlockJob *find_block_job(const char *device)
@@ -1968,6 +2002,34 @@ QemuOptsList qemu_common_drive_opts = {
.type = QEMU_OPT_NUMBER,
.help = "limit write bytes per second",
},{
+ .name = "throttling.iops-total-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "I/O operations burst",
+ },{
+ .name = "throttling.iops-read-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "I/O operations read burst",
+ },{
+ .name = "throttling.iops-write-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "I/O operations write burst",
+ },{
+ .name = "throttling.bps-total-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "total bytes burst",
+ },{
+ .name = "throttling.bps-read-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "total bytes read burst",
+ },{
+ .name = "throttling.bps-write-max",
+ .type = QEMU_OPT_NUMBER,
+ .help = "total bytes write burst",
+ },{
+ .name = "throttling.iops-size",
+ .type = QEMU_OPT_NUMBER,
+ .help = "when limiting by iops max size of an I/O in bytes",
+ },{
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
diff --git a/blockjob.c b/blockjob.c
index 7edc945119..e7d49b7169 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -45,6 +45,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
return NULL;
}
+ bdrv_ref(bs);
bdrv_set_in_use(bs, 1);
job = g_malloc0(job_type->instance_size);
diff --git a/hmp.c b/hmp.c
index baadbc0949..b4a6422e7a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -344,14 +344,30 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
{
monitor_printf(mon, " I/O throttling: bps=%" PRId64
" bps_rd=%" PRId64 " bps_wr=%" PRId64
+ " bps_max=%" PRId64
+ " bps_rd_max=%" PRId64
+ " bps_wr_max=%" PRId64
" iops=%" PRId64 " iops_rd=%" PRId64
- " iops_wr=%" PRId64 "\n",
+ " iops_wr=%" PRId64
+ " iops_max=%" PRId64
+ " iops_rd_max=%" PRId64
+ " iops_wr_max=%" PRId64
+ " iops_size=%" PRId64 "\n",
info->value->inserted->bps,
info->value->inserted->bps_rd,
info->value->inserted->bps_wr,
+ info->value->inserted->bps_max,
+ info->value->inserted->bps_rd_max,
+ info->value->inserted->bps_wr_max,
info->value->inserted->iops,
info->value->inserted->iops_rd,
- info->value->inserted->iops_wr);
+ info->value->inserted->iops_wr,
+ info->value->inserted->iops_max,
+ info->value->inserted->iops_rd_max,
+ info->value->inserted->iops_wr_max,
+ info->value->inserted->iops_size);
+ } else {
+ monitor_printf(mon, " [not inserted]");
}
if (verbose) {
@@ -1098,7 +1114,21 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict)
qdict_get_int(qdict, "bps_wr"),
qdict_get_int(qdict, "iops"),
qdict_get_int(qdict, "iops_rd"),
- qdict_get_int(qdict, "iops_wr"), &err);
+ qdict_get_int(qdict, "iops_wr"),
+ false, /* no burst max via HMP */
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false,
+ 0,
+ false, /* No default I/O size */
+ 0, &err);
hmp_handle_error(mon, &err);
}
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 5a96ccd416..f2d7350a50 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -42,6 +42,7 @@ typedef struct {
struct VirtIOBlockDataPlane {
bool started;
+ bool starting;
bool stopping;
QEMUBH *start_bh;
QemuThread thread;
@@ -451,8 +452,15 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
return;
}
+ if (s->starting) {
+ return;
+ }
+
+ s->starting = true;
+
vq = virtio_get_queue(s->vdev, 0);
if (!vring_setup(&s->vring, s->vdev, 0)) {
+ s->starting = false;
return;
}
@@ -482,6 +490,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
s->io_notifier = *ioq_get_notifier(&s->ioqueue);
aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io);
+ s->starting = false;
s->started = true;
trace_virtio_blk_data_plane_start(s);
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 727f4331c0..668cc069ff 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -813,7 +813,7 @@ static int blk_connect(struct XenDevice *xendev)
readonly);
if (bdrv_open(blkdev->bs,
blkdev->filename, NULL, qflags, drv) != 0) {
- bdrv_delete(blkdev->bs);
+ bdrv_unref(blkdev->bs);
blkdev->bs = NULL;
}
}
@@ -824,6 +824,9 @@ static int blk_connect(struct XenDevice *xendev)
/* setup via qemu cmdline -> already setup for us */
xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
blkdev->bs = blkdev->dinfo->bdrv;
+ /* blkdev->bs is not create by us, we get a reference
+ * so we can bdrv_unref() unconditionally */
+ bdrv_ref(blkdev->bs);
}
bdrv_attach_dev_nofail(blkdev->bs, blkdev);
blkdev->file_size = bdrv_getlength(blkdev->bs);
@@ -922,12 +925,8 @@ static void blk_disconnect(struct XenDevice *xendev)
struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
if (blkdev->bs) {
- if (!blkdev->dinfo) {
- /* close/delete only if we created it ourself */
- bdrv_close(blkdev->bs);
- bdrv_detach_dev(blkdev->bs, blkdev);
- bdrv_delete(blkdev->bs);
- }
+ bdrv_detach_dev(blkdev->bs, blkdev);
+ bdrv_unref(blkdev->bs);
blkdev->bs = NULL;
}
xen_be_unbind_evtchn(&blkdev->xendev);
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 275e39595d..907792b721 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -89,7 +89,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
FWCfgState *fw_cfg = NULL;
PcGuestInfo *guest_info;
- if (xen_enabled() && xen_hvm_init() != 0) {
+ if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
fprintf(stderr, "xen hardware virtual machine initialisation failed\n");
exit(1);
}
@@ -339,14 +339,25 @@ static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
.desc = "Standard PC (i440FX + PIIX, 1996)", \
.hot_add_cpu = pc_hot_add_cpu
+#define PC_I440FX_1_7_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
+static QEMUMachine pc_i440fx_machine_v1_7 = {
+ PC_I440FX_1_7_MACHINE_OPTIONS,
+ .name = "pc-i440fx-1.7",
+ .alias = "pc",
+ .init = pc_init_pci,
+ .is_default = 1,
+};
+
#define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
static QEMUMachine pc_i440fx_machine_v1_6 = {
PC_I440FX_1_6_MACHINE_OPTIONS,
.name = "pc-i440fx-1.6",
- .alias = "pc",
.init = pc_init_pci_1_6,
- .is_default = 1,
+ .compat_props = (GlobalProperty[]) {
+ PC_COMPAT_1_6,
+ { /* end of list */ }
+ },
};
static QEMUMachine pc_i440fx_machine_v1_5 = {
@@ -735,6 +746,7 @@ static QEMUMachine xenfv_machine = {
static void pc_machine_init(void)
{
+ qemu_register_machine(&pc_i440fx_machine_v1_7);
qemu_register_machine(&pc_i440fx_machine_v1_6);
qemu_register_machine(&pc_i440fx_machine_v1_5);
qemu_register_machine(&pc_i440fx_machine_v1_4);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index d7b7c3bf9a..ca84e1c04c 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -76,6 +76,11 @@ static void pc_q35_init(QEMUMachineInitArgs *args)
DeviceState *icc_bridge;
PcGuestInfo *guest_info;
+ if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) {
+ fprintf(stderr, "xen hardware virtual machine initialisation failed\n");
+ exit(1);
+ }
+
icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
object_property_add_child(qdev_get_machine(), "icc-bridge",
OBJECT(icc_bridge), NULL);
@@ -258,13 +263,25 @@ static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
.desc = "Standard PC (Q35 + ICH9, 2009)", \
.hot_add_cpu = pc_hot_add_cpu
+#define PC_Q35_1_7_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
+
+static QEMUMachine pc_q35_machine_v1_7 = {
+ PC_Q35_1_7_MACHINE_OPTIONS,
+ .name = "pc-q35-1.7",
+ .alias = "q35",
+ .init = pc_q35_init,
+};
+
#define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
static QEMUMachine pc_q35_machine_v1_6 = {
PC_Q35_1_6_MACHINE_OPTIONS,
.name = "pc-q35-1.6",
- .alias = "q35",
.init = pc_q35_init_1_6,
+ .compat_props = (GlobalProperty[]) {
+ PC_COMPAT_1_6,
+ { /* end of list */ }
+ },
};
static QEMUMachine pc_q35_machine_v1_5 = {
@@ -293,6 +310,7 @@ static QEMUMachine pc_q35_machine_v1_4 = {
static void pc_q35_machine_init(void)
{
+ qemu_register_machine(&pc_q35_machine_v1_7);
qemu_register_machine(&pc_q35_machine_v1_6);
qemu_register_machine(&pc_q35_machine_v1_5);
qemu_register_machine(&pc_q35_machine_v1_4);
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index f5ebed46ab..d3f274cc28 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -135,9 +135,16 @@ typedef struct E1000State_st {
QEMUTimer *autoneg_timer;
+ QEMUTimer *mit_timer; /* Mitigation timer. */
+ bool mit_timer_on; /* Mitigation timer is running. */
+ bool mit_irq_level; /* Tracks interrupt pin level. */
+ uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */
+
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
#define E1000_FLAG_AUTONEG_BIT 0
+#define E1000_FLAG_MIT_BIT 1
#define E1000_FLAG_AUTONEG (1 << E1000_FLAG_AUTONEG_BIT)
+#define E1000_FLAG_MIT (1 << E1000_FLAG_MIT_BIT)
uint32_t compat_flags;
} E1000State;
@@ -158,7 +165,8 @@ enum {
defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL),
defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC),
defreg(RA), defreg(MTA), defreg(CRCERRS),defreg(VFTA),
- defreg(VET),
+ defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV),
+ defreg(ITR),
};
static void
@@ -245,10 +253,21 @@ static const uint32_t mac_reg_init[] = {
E1000_MANC_RMCP_EN,
};
+/* Helper function, *curr == 0 means the value is not set */
+static inline void
+mit_update_delay(uint32_t *curr, uint32_t value)
+{
+ if (value && (*curr == 0 || value < *curr)) {
+ *curr = value;
+ }
+}
+
static void
set_interrupt_cause(E1000State *s, int index, uint32_t val)
{
PCIDevice *d = PCI_DEVICE(s);
+ uint32_t pending_ints;
+ uint32_t mit_delay;
if (val && (E1000_DEVID >= E1000_DEV_ID_82547EI_MOBILE)) {
/* Only for 8257x */
@@ -266,7 +285,57 @@ set_interrupt_cause(E1000State *s, int index, uint32_t val)
*/
s->mac_reg[ICS] = val;
- qemu_set_irq(d->irq[0], (s->mac_reg[IMS] & s->mac_reg[ICR]) != 0);
+ pending_ints = (s->mac_reg[IMS] & s->mac_reg[ICR]);
+ if (!s->mit_irq_level && pending_ints) {
+ /*
+ * Here we detect a potential raising edge. We postpone raising the
+ * interrupt line if we are inside the mitigation delay window
+ * (s->mit_timer_on == 1).
+ * We provide a partial implementation of interrupt mitigation,
+ * emulating only RADV, TADV and ITR (lower 16 bits, 1024ns units for
+ * RADV and TADV, 256ns units for ITR). RDTR is only used to enable
+ * RADV; relative timers based on TIDV and RDTR are not implemented.
+ */
+ if (s->mit_timer_on) {
+ return;
+ }
+ if (s->compat_flags & E1000_FLAG_MIT) {
+ /* Compute the next mitigation delay according to pending
+ * interrupts and the current values of RADV (provided
+ * RDTR!=0), TADV and ITR.
+ * Then rearm the timer.
+ */
+ mit_delay = 0;
+ if (s->mit_ide &&
+ (pending_ints & (E1000_ICR_TXQE | E1000_ICR_TXDW))) {
+ mit_update_delay(&mit_delay, s->mac_reg[TADV] * 4);
+ }
+ if (s->mac_reg[RDTR] && (pending_ints & E1000_ICS_RXT0)) {
+ mit_update_delay(&mit_delay, s->mac_reg[RADV] * 4);
+ }
+ mit_update_delay(&mit_delay, s->mac_reg[ITR]);
+
+ if (mit_delay) {
+ s->mit_timer_on = 1;
+ timer_mod(s->mit_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+ mit_delay * 256);
+ }
+ s->mit_ide = 0;
+ }
+ }
+
+ s->mit_irq_level = (pending_ints != 0);
+ qemu_set_irq(d->irq[0], s->mit_irq_level);
+}
+
+static void
+e1000_mit_timer(void *opaque)
+{
+ E1000State *s = opaque;
+
+ s->mit_timer_on = 0;
+ /* Call set_interrupt_cause to update the irq level (if necessary). */
+ set_interrupt_cause(s, 0, s->mac_reg[ICR]);
}
static void
@@ -307,6 +376,10 @@ static void e1000_reset(void *opaque)
int i;
timer_del(d->autoneg_timer);
+ timer_del(d->mit_timer);
+ d->mit_timer_on = 0;
+ d->mit_irq_level = 0;
+ d->mit_ide = 0;
memset(d->phy_reg, 0, sizeof d->phy_reg);
memmove(d->phy_reg, phy_reg_init, sizeof phy_reg_init);
memset(d->mac_reg, 0, sizeof d->mac_reg);
@@ -572,6 +645,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
struct e1000_tx *tp = &s->tx;
+ s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
if (dtype == E1000_TXD_CMD_DEXT) { // context descriptor
op = le32_to_cpu(xp->cmd_and_length);
tp->ipcss = xp->lower_setup.ip_fields.ipcss;
@@ -1047,7 +1121,8 @@ static uint32_t (*macreg_readops[])(E1000State *, int) = {
getreg(TORL), getreg(TOTL), getreg(IMS), getreg(TCTL),
getreg(RDH), getreg(RDT), getreg(VET), getreg(ICS),
getreg(TDBAL), getreg(TDBAH), getreg(RDBAH), getreg(RDBAL),
- getreg(TDLEN), getreg(RDLEN),
+ getreg(TDLEN), getreg(RDLEN), getreg(RDTR), getreg(RADV),
+ getreg(TADV), getreg(ITR),
[TOTH] = mac_read_clr8, [TORH] = mac_read_clr8, [GPRC] = mac_read_clr4,
[GPTC] = mac_read_clr4, [TPR] = mac_read_clr4, [TPT] = mac_read_clr4,
@@ -1069,6 +1144,8 @@ static void (*macreg_writeops[])(E1000State *, int, uint32_t) = {
[TDH] = set_16bit, [RDH] = set_16bit, [RDT] = set_rdt,
[IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
[EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
+ [RDTR] = set_16bit, [RADV] = set_16bit, [TADV] = set_16bit,
+ [ITR] = set_16bit,
[RA ... RA+31] = &mac_writereg,
[MTA ... MTA+127] = &mac_writereg,
[VFTA ... VFTA+127] = &mac_writereg,
@@ -1150,6 +1227,11 @@ static void e1000_pre_save(void *opaque)
E1000State *s = opaque;
NetClientState *nc = qemu_get_queue(s->nic);
+ /* If the mitigation timer is active, emulate a timeout now. */
+ if (s->mit_timer_on) {
+ e1000_mit_timer(s);
+ }
+
if (!(s->compat_flags & E1000_FLAG_AUTONEG)) {
return;
}
@@ -1171,6 +1253,14 @@ static int e1000_post_load(void *opaque, int version_id)
E1000State *s = opaque;
NetClientState *nc = qemu_get_queue(s->nic);
+ if (!(s->compat_flags & E1000_FLAG_MIT)) {
+ s->mac_reg[ITR] = s->mac_reg[RDTR] = s->mac_reg[RADV] =
+ s->mac_reg[TADV] = 0;
+ s->mit_irq_level = false;
+ }
+ s->mit_ide = 0;
+ s->mit_timer_on = false;
+
/* nc.link_down can't be migrated, so infer link_down according
* to link status bit in mac_reg[STATUS].
* Alternatively, restart link negotiation if it was in progress. */
@@ -1190,6 +1280,28 @@ static int e1000_post_load(void *opaque, int version_id)
return 0;
}
+static bool e1000_mit_state_needed(void *opaque)
+{
+ E1000State *s = opaque;
+
+ return s->compat_flags & E1000_FLAG_MIT;
+}
+
+static const VMStateDescription vmstate_e1000_mit_state = {
+ .name = "e1000/mit_state",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(mac_reg[RDTR], E1000State),
+ VMSTATE_UINT32(mac_reg[RADV], E1000State),
+ VMSTATE_UINT32(mac_reg[TADV], E1000State),
+ VMSTATE_UINT32(mac_reg[ITR], E1000State),
+ VMSTATE_BOOL(mit_irq_level, E1000State),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_e1000 = {
.name = "e1000",
.version_id = 2,
@@ -1267,6 +1379,14 @@ static const VMStateDescription vmstate_e1000 = {
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (VMStateSubsection[]) {
+ {
+ .vmsd = &vmstate_e1000_mit_state,
+ .needed = e1000_mit_state_needed,
+ }, {
+ /* empty */
+ }
}
};
@@ -1316,6 +1436,8 @@ pci_e1000_uninit(PCIDevice *dev)
timer_del(d->autoneg_timer);
timer_free(d->autoneg_timer);
+ timer_del(d->mit_timer);
+ timer_free(d->mit_timer);
memory_region_destroy(&d->mmio);
memory_region_destroy(&d->io);
qemu_del_nic(d->nic);
@@ -1371,6 +1493,7 @@ static int pci_e1000_init(PCIDevice *pci_dev)
add_boot_device_path(d->conf.bootindex, dev, "/ethernet-phy@0");
d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d);
+ d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d);
return 0;
}
@@ -1385,6 +1508,8 @@ static Property e1000_properties[] = {
DEFINE_NIC_PROPERTIES(E1000State, conf),
DEFINE_PROP_BIT("autonegotiation", E1000State,
compat_flags, E1000_FLAG_AUTONEG_BIT, true),
+ DEFINE_PROP_BIT("mitigation", E1000State,
+ compat_flags, E1000_FLAG_MIT_BIT, true),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 31afd28c7c..c96125895e 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -693,7 +693,7 @@ static void ne2000_write(void *opaque, hwaddr addr,
static const MemoryRegionOps ne2000_ops = {
.read = ne2000_read,
.write = ne2000_write,
- .endianness = DEVICE_NATIVE_ENDIAN,
+ .endianness = DEVICE_LITTLE_ENDIAN,
};
/***********************************************************/
diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index 4eae7c76be..f987d71269 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -34,7 +34,6 @@
#define __le16 uint16_t
#define __le32 uint32_t
#define __le64 uint64_t
-#define __packed QEMU_PACKED
#if defined(HOST_WORDS_BIGENDIAN)
#define __BIG_ENDIAN_BITFIELD
@@ -749,7 +748,6 @@ struct Vmxnet3_DriverShared {
#undef __le16
#undef __le32
#undef __le64
-#undef __packed
#if defined(HOST_WORDS_BIGENDIAN)
#undef __BIG_ENDIAN_BITFIELD
#endif
diff --git a/include/block/block.h b/include/block/block.h
index e6b391ce88..728ec1aebf 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,6 +81,32 @@ typedef struct BlockDevOps {
#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+/* BDRV_BLOCK_DATA: data is read from bs->file or another file
+ * BDRV_BLOCK_ZERO: sectors read as zero
+ * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data
+ *
+ * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in
+ * bs->file where sector data can be read from as raw data.
+ *
+ * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present.
+ *
+ * DATA ZERO OFFSET_VALID
+ * t t t sectors read as zero, bs->file is zero at offset
+ * t f t sectors read as valid from bs->file at offset
+ * f t t sectors preallocated, read as zero, bs->file not
+ * necessarily zero at offset
+ * f f t sectors preallocated but read from backing_hd,
+ * bs->file contains garbage at offset
+ * t t f sectors preallocated, read as zero, unknown offset
+ * t f f sectors read from unknown file or offset
+ * f t f not allocated or unknown offset, read as zero
+ * f f f not allocated or unknown offset, read from backing_hd
+ */
+#define BDRV_BLOCK_DATA 1
+#define BDRV_BLOCK_ZERO 2
+#define BDRV_BLOCK_OFFSET_VALID 4
+#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK
+
typedef enum {
BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
} BlockErrorAction;
@@ -107,7 +133,6 @@ void bdrv_info_stats(Monitor *mon, QObject **ret_data);
/* disk I/O throttling */
void bdrv_io_limits_enable(BlockDriverState *bs);
void bdrv_io_limits_disable(BlockDriverState *bs);
-bool bdrv_io_limits_enabled(BlockDriverState *bs);
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
@@ -123,7 +148,6 @@ BlockDriverState *bdrv_new(const char *device_name);
void bdrv_make_anon(BlockDriverState *bs);
void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
-void bdrv_delete(BlockDriverState *bs);
int bdrv_parse_cache_flags(const char *mode, int *flags);
int bdrv_parse_discard_flags(const char *mode, int *flags);
int bdrv_file_open(BlockDriverState **pbs, const char *filename,
@@ -181,12 +205,6 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
*/
int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
-int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum);
-int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
int bdrv_get_backing_file_depth(BlockDriverState *bs);
@@ -277,6 +295,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
@@ -356,6 +376,8 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs);
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
+void bdrv_ref(BlockDriverState *bs);
+void bdrv_unref(BlockDriverState *bs);
void bdrv_set_in_use(BlockDriverState *bs, int in_use);
int bdrv_in_use(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8012e253c9..7c35198ad7 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -35,18 +35,12 @@
#include "qemu/hbitmap.h"
#include "block/snapshot.h"
#include "qemu/main-loop.h"
+#include "qemu/throttle.h"
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
-#define BLOCK_IO_LIMIT_READ 0
-#define BLOCK_IO_LIMIT_WRITE 1
-#define BLOCK_IO_LIMIT_TOTAL 2
-
-#define BLOCK_IO_SLICE_TIME 100000000
-#define NANOSECONDS_PER_SECOND 1000000000.0
-
#define BLOCK_OPT_SIZE "size"
#define BLOCK_OPT_ENCRYPT "encryption"
#define BLOCK_OPT_COMPAT6 "compat6"
@@ -70,17 +64,6 @@ typedef struct BdrvTrackedRequest {
CoQueue wait_queue; /* coroutines blocked on this request */
} BdrvTrackedRequest;
-
-typedef struct BlockIOLimit {
- int64_t bps[3];
- int64_t iops[3];
-} BlockIOLimit;
-
-typedef struct BlockIOBaseValue {
- uint64_t bytes[2];
- uint64_t ios[2];
-} BlockIOBaseValue;
-
struct BlockDriver {
const char *format_name;
int instance_size;
@@ -135,7 +118,7 @@ struct BlockDriver {
int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
- int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum);
/*
@@ -264,13 +247,9 @@ struct BlockDriverState {
/* number of in-flight copy-on-read requests */
unsigned int copy_on_read_in_flight;
- /* the time for latest disk I/O */
- int64_t slice_start;
- int64_t slice_end;
- BlockIOLimit io_limits;
- BlockIOBaseValue slice_submitted;
- CoQueue throttled_reqs;
- QEMUTimer *block_timer;
+ /* I/O throttling */
+ ThrottleState throttle_state;
+ CoQueue throttled_reqs[2];
bool io_limits_enabled;
/* I/O stats (display with "info blockstats"). */
@@ -298,6 +277,7 @@ struct BlockDriverState {
BlockDeviceIoStatus iostatus;
char device_name[32];
HBitmap *dirty_bitmap;
+ int refcnt;
int in_use; /* users other than guest access, eg. block migration */
QTAILQ_ENTRY(BlockDriverState) list;
@@ -312,7 +292,8 @@ struct BlockDriverState {
int get_tmp_filename(char *filename, int size);
void bdrv_set_io_limits(BlockDriverState *bs,
- BlockIOLimit *io_limits);
+ ThrottleConfig *cfg);
+
/**
* bdrv_add_before_write_notifier:
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 7fb04d8cd8..9b2ddc4acc 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -225,7 +225,15 @@ void pvpanic_init(ISABus *bus);
int e820_add_entry(uint64_t, uint64_t, uint32_t);
+#define PC_COMPAT_1_6 \
+ {\
+ .driver = "e1000",\
+ .property = "mitigation",\
+ .value = "off",\
+ }
+
#define PC_COMPAT_1_5 \
+ PC_COMPAT_1_6, \
{\
.driver = "Conroe-" TYPE_X86_CPU,\
.property = "model",\
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index 6d42dd1bd1..e1f88bf9cf 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -37,17 +37,15 @@ void xen_cmos_set_s3_resume(void *opaque, int irq, int level);
qemu_irq *xen_interrupt_controller_init(void);
int xen_init(void);
-int xen_hvm_init(void);
+int xen_hvm_init(MemoryRegion **ram_memory);
void xenstore_store_pv_console_info(int i, struct CharDriverState *chr);
#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
-struct MemoryRegion;
void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
struct MemoryRegion *mr);
void xen_modified_memory(ram_addr_t start, ram_addr_t length);
#endif
-struct MemoryRegion;
void xen_register_framebuffer(struct MemoryRegion *mr);
#if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
diff --git a/include/net/net.h b/include/net/net.h
index 30e4b04066..11e146888b 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -69,7 +69,7 @@ struct NetClientState {
int link_down;
QTAILQ_ENTRY(NetClientState) next;
NetClientState *peer;
- NetQueue *send_queue;
+ NetQueue *incoming_queue;
char *model;
char *name;
char info_str[256];
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
new file mode 100644
index 0000000000..ab29b0b918
--- /dev/null
+++ b/include/qemu/throttle.h
@@ -0,0 +1,110 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ * Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef THROTTLE_H
+#define THROTTLE_H
+
+#include <stdint.h>
+#include "qemu-common.h"
+#include "qemu/timer.h"
+
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
+typedef enum {
+ THROTTLE_BPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_BPS_WRITE,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_OPS_READ,
+ THROTTLE_OPS_WRITE,
+ BUCKETS_COUNT,
+} BucketType;
+
+/*
+ * The max parameter of the leaky bucket throttling algorithm can be used to
+ * allow the guest to do bursts.
+ * The max value is a pool of I/O that the guest can use without being throttled
+ * at all. Throttling is triggered once this pool is empty.
+ */
+
+typedef struct LeakyBucket {
+ double avg; /* average goal in units per second */
+ double max; /* leaky bucket max burst in units */
+ double level; /* bucket level in units */
+} LeakyBucket;
+
+/* The following structure is used to configure a ThrottleState
+ * It contains a bit of state: the bucket field of the LeakyBucket structure.
+ * However it allows to keep the code clean and the bucket field is reset to
+ * zero at the right time.
+ */
+typedef struct ThrottleConfig {
+ LeakyBucket buckets[BUCKETS_COUNT]; /* leaky buckets */
+ uint64_t op_size; /* size of an operation in bytes */
+} ThrottleConfig;
+
+typedef struct ThrottleState {
+ ThrottleConfig cfg; /* configuration */
+ int64_t previous_leak; /* timestamp of the last leak done */
+ QEMUTimer * timers[2]; /* timers used to do the throttling */
+ QEMUClockType clock_type; /* the clock used */
+} ThrottleState;
+
+/* operations on single leaky buckets */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta);
+
+int64_t throttle_compute_wait(LeakyBucket *bkt);
+
+/* expose timer computation function for unit tests */
+bool throttle_compute_timer(ThrottleState *ts,
+ bool is_write,
+ int64_t now,
+ int64_t *next_timestamp);
+
+/* init/destroy cycle */
+void throttle_init(ThrottleState *ts,
+ QEMUClockType clock_type,
+ void (read_timer)(void *),
+ void (write_timer)(void *),
+ void *timer_opaque);
+
+void throttle_destroy(ThrottleState *ts);
+
+bool throttle_have_timer(ThrottleState *ts);
+
+/* configuration */
+bool throttle_enabled(ThrottleConfig *cfg);
+
+bool throttle_conflicting(ThrottleConfig *cfg);
+
+bool throttle_is_valid(ThrottleConfig *cfg);
+
+void throttle_config(ThrottleState *ts, ThrottleConfig *cfg);
+
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg);
+
+/* usage */
+bool throttle_schedule_timer(ThrottleState *ts, bool is_write);
+
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size);
+
+#endif
diff --git a/nbd.c b/nbd.c
index 0fd05836ca..f847940f3e 100644
--- a/nbd.c
+++ b/nbd.c
@@ -882,6 +882,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
exp->nbdflags = nbdflags;
exp->size = size == -1 ? bdrv_getlength(bs) : size;
exp->close = close;
+ bdrv_ref(bs);
return exp;
}
@@ -928,6 +929,10 @@ void nbd_export_close(NBDExport *exp)
}
nbd_export_set_name(exp, NULL);
nbd_export_put(exp);
+ if (exp->bs) {
+ bdrv_unref(exp->bs);
+ exp->bs = NULL;
+ }
}
void nbd_export_get(NBDExport *exp)
diff --git a/net/hub.c b/net/hub.c
index df32074de0..33a99c99ef 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -347,7 +347,7 @@ bool net_hub_flush(NetClientState *nc)
QLIST_FOREACH(port, &source_port->hub->ports, next) {
if (port != source_port) {
- ret += qemu_net_queue_flush(port->nc.send_queue);
+ ret += qemu_net_queue_flush(port->nc.incoming_queue);
}
}
return ret ? true : false;
diff --git a/net/net.c b/net/net.c
index 114859267e..c330c9a3a8 100644
--- a/net/net.c
+++ b/net/net.c
@@ -207,7 +207,7 @@ static void qemu_net_client_setup(NetClientState *nc,
}
QTAILQ_INSERT_TAIL(&net_clients, nc, next);
- nc->send_queue = qemu_new_net_queue(nc);
+ nc->incoming_queue = qemu_new_net_queue(nc);
nc->destructor = destructor;
}
@@ -289,8 +289,8 @@ static void qemu_cleanup_net_client(NetClientState *nc)
static void qemu_free_net_client(NetClientState *nc)
{
- if (nc->send_queue) {
- qemu_del_net_queue(nc->send_queue);
+ if (nc->incoming_queue) {
+ qemu_del_net_queue(nc->incoming_queue);
}
if (nc->peer) {
nc->peer->peer = NULL;
@@ -431,7 +431,7 @@ void qemu_purge_queued_packets(NetClientState *nc)
return;
}
- qemu_net_queue_purge(nc->peer->send_queue, nc);
+ qemu_net_queue_purge(nc->peer->incoming_queue, nc);
}
void qemu_flush_queued_packets(NetClientState *nc)
@@ -444,7 +444,7 @@ void qemu_flush_queued_packets(NetClientState *nc)
}
return;
}
- if (qemu_net_queue_flush(nc->send_queue)) {
+ if (qemu_net_queue_flush(nc->incoming_queue)) {
/* We emptied the queue successfully, signal to the IO thread to repoll
* the file descriptor (for tap, for example).
*/
@@ -468,7 +468,7 @@ static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
return size;
}
- queue = sender->peer->send_queue;
+ queue = sender->peer->incoming_queue;
return qemu_net_queue_send(queue, sender, flags, buf, size, sent_cb);
}
@@ -543,7 +543,7 @@ ssize_t qemu_sendv_packet_async(NetClientState *sender,
return iov_size(iov, iovcnt);
}
- queue = sender->peer->send_queue;
+ queue = sender->peer->incoming_queue;
return qemu_net_queue_send_iov(queue, sender,
QEMU_NET_PACKET_FLAG_NONE,
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index f61d580963..90f8a02276 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -44,8 +44,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
struct stat s;
#endif
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
- defined(__OpenBSD__) || defined(__APPLE__)
/* if no ifname is given, always start the search from tap0/tun0. */
int i;
char dname[100];
@@ -76,15 +74,6 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
dname, strerror(errno));
return -1;
}
-#else
- TFR(fd = open("/dev/tap", O_RDWR));
- if (fd < 0) {
- fprintf(stderr,
- "warning: could not open /dev/tap: no virtual network emulation: %s\n",
- strerror(errno));
- return -1;
- }
-#endif
#ifdef TAPGIFNAME
if (ioctl(fd, TAPGIFNAME, (void *)&ifr) < 0) {
diff --git a/qapi-schema.json b/qapi-schema.json
index a51f7d2d6e..2b2c8bce07 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -785,6 +785,20 @@
#
# @image: the info of image used (since: 1.6)
#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
# Since: 0.14.0
#
# Notes: This interface is only found in @BlockInfo.
@@ -795,7 +809,11 @@
'encrypted': 'bool', 'encryption_key_missing': 'bool',
'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
- 'image': 'ImageInfo' } }
+ 'image': 'ImageInfo',
+ '*bps_max': 'int', '*bps_rd_max': 'int',
+ '*bps_wr_max': 'int', '*iops_max': 'int',
+ '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*iops_size': 'int' } }
##
# @BlockDeviceIoStatus:
@@ -813,6 +831,35 @@
{ 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
##
+# @BlockDeviceMapEntry:
+#
+# Entry in the metadata map of the device (returned by "qemu-img map")
+#
+# @start: Offset in the image of the first byte described by this entry
+# (in bytes)
+#
+# @length: Length of the range described by this entry (in bytes)
+#
+# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.)
+# before reaching one for which the range is allocated. The value is
+# in the range 0 to the depth of the image chain - 1.
+#
+# @zero: the sectors in this range read as zeros
+#
+# @data: reading the image will actually read data from a file (in particular,
+# if @offset is present this means that the sectors are not simply
+# preallocated, but contain actual data in raw format)
+#
+# @offset: if present, the image file stores the data for this range in
+# raw format at the given offset.
+#
+# Since 1.7
+##
+{ 'type': 'BlockDeviceMapEntry',
+ 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
+ 'data': 'bool', '*offset': 'int' } }
+
+##
# @BlockDirtyInfo:
#
# Block dirty bitmap information.
@@ -2174,6 +2221,20 @@
#
# @iops_wr: write I/O operations per second
#
+# @bps_max: #optional total max in bytes (Since 1.7)
+#
+# @bps_rd_max: #optional read max in bytes (Since 1.7)
+#
+# @bps_wr_max: #optional write max in bytes (Since 1.7)
+#
+# @iops_max: #optional total I/O operations max (Since 1.7)
+#
+# @iops_rd_max: #optional read I/O operations max (Since 1.7)
+#
+# @iops_wr_max: #optional write I/O operations max (Since 1.7)
+#
+# @iops_size: #optional an I/O size in bytes (Since 1.7)
+#
# Returns: Nothing on success
# If @device is not a valid block device, DeviceNotFound
#
@@ -2181,7 +2242,11 @@
##
{ 'command': 'block_set_io_throttle',
'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
- 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int' } }
+ 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+ '*bps_max': 'int', '*bps_rd_max': 'int',
+ '*bps_wr_max': 'int', '*iops_max': 'int',
+ '*iops_rd_max': 'int', '*iops_wr_max': 'int',
+ '*iops_size': 'int' } }
##
# @block-stream:
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 4ca7e95655..0c36e5968f 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -34,9 +34,9 @@ STEXI
ETEXI
DEF("convert", img_convert,
- "convert [-c] [-p] [-q] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename")
+ "convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename")
STEXI
-@item convert [-c] [-p] [-q] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
ETEXI
DEF("info", img_info,
@@ -45,6 +45,12 @@ STEXI
@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
ETEXI
+DEF("map", img_map,
+ "map [-f fmt] [--output=ofmt] filename")
+STEXI
+@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+ETEXI
+
DEF("snapshot", img_snapshot,
"snapshot [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename")
STEXI
diff --git a/qemu-img.c b/qemu-img.c
index b9a848db74..3e5e388d1c 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -103,6 +103,8 @@ static void help(void)
" '-S' indicates the consecutive number of bytes that must contain only zeros\n"
" for qemu-img to create a sparse image during conversion\n"
" '--output' takes the format in which the output must be done (human or json)\n"
+ " '-n' skips the target volume creation (useful if the volume is created\n"
+ " prior to running qemu-img)\n"
"\n"
"Parameters to check subcommand:\n"
" '-r' tries to repair any inconsistencies that are found during the check.\n"
@@ -298,7 +300,7 @@ static BlockDriverState *bdrv_new_open(const char *filename,
return bs;
fail:
if (bs) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
}
return NULL;
}
@@ -652,7 +654,7 @@ static int img_check(int argc, char **argv)
fail:
qapi_free_ImageCheck(check);
- bdrv_delete(bs);
+ bdrv_unref(bs);
return ret;
}
@@ -722,7 +724,7 @@ static int img_commit(int argc, char **argv)
break;
}
- bdrv_delete(bs);
+ bdrv_unref(bs);
if (ret) {
return 1;
}
@@ -1104,11 +1106,11 @@ static int img_compare(int argc, char **argv)
ret = 0;
out:
- bdrv_delete(bs2);
+ bdrv_unref(bs2);
qemu_vfree(buf1);
qemu_vfree(buf2);
out2:
- bdrv_delete(bs1);
+ bdrv_unref(bs1);
out3:
qemu_progress_end();
return ret;
@@ -1116,7 +1118,8 @@ out3:
static int img_convert(int argc, char **argv)
{
- int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
+ int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size,
+ cluster_sectors, skip_create;
int progress = 0, flags;
const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
BlockDriver *drv, *proto_drv;
@@ -1139,8 +1142,9 @@ static int img_convert(int argc, char **argv)
cache = "unsafe";
out_baseimg = NULL;
compress = 0;
+ skip_create = 0;
for(;;) {
- c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:q");
+ c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:qn");
if (c == -1) {
break;
}
@@ -1197,6 +1201,9 @@ static int img_convert(int argc, char **argv)
case 'q':
quiet = true;
break;
+ case 'n':
+ skip_create = 1;
+ break;
}
}
@@ -1329,20 +1336,22 @@ static int img_convert(int argc, char **argv)
}
}
- /* Create the new image */
- ret = bdrv_create(drv, out_filename, param);
- if (ret < 0) {
- if (ret == -ENOTSUP) {
- error_report("Formatting not supported for file format '%s'",
- out_fmt);
- } else if (ret == -EFBIG) {
- error_report("The image size is too large for file format '%s'",
- out_fmt);
- } else {
- error_report("%s: error while converting %s: %s",
- out_filename, out_fmt, strerror(-ret));
+ if (!skip_create) {
+ /* Create the new image */
+ ret = bdrv_create(drv, out_filename, param);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error_report("Formatting not supported for file format '%s'",
+ out_fmt);
+ } else if (ret == -EFBIG) {
+ error_report("The image size is too large for file format '%s'",
+ out_fmt);
+ } else {
+ error_report("%s: error while converting %s: %s",
+ out_filename, out_fmt, strerror(-ret));
+ }
+ goto out;
}
- goto out;
}
flags = BDRV_O_RDWR;
@@ -1363,6 +1372,20 @@ static int img_convert(int argc, char **argv)
bdrv_get_geometry(bs[0], &bs_sectors);
buf = qemu_blockalign(out_bs, IO_BUF_SIZE);
+ if (skip_create) {
+ int64_t output_length = bdrv_getlength(out_bs);
+ if (output_length < 0) {
+ error_report("unable to get output image length: %s\n",
+ strerror(-output_length));
+ ret = -1;
+ goto out;
+ } else if (output_length < total_sectors << BDRV_SECTOR_BITS) {
+ error_report("output file is smaller than input file");
+ ret = -1;
+ goto out;
+ }
+ }
+
if (compress) {
ret = bdrv_get_info(out_bs, &bdi);
if (ret < 0) {
@@ -1479,21 +1502,26 @@ static int img_convert(int argc, char **argv)
n = bs_offset + bs_sectors - sector_num;
}
- if (has_zero_init) {
- /* If the output image is being created as a copy on write image,
- assume that sectors which are unallocated in the input image
- are present in both the output's and input's base images (no
- need to copy them). */
- if (out_baseimg) {
- if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
- n, &n1)) {
- sector_num += n1;
- continue;
- }
- /* The next 'n1' sectors are allocated in the input image. Copy
- only those as they may be followed by unallocated sectors. */
- n = n1;
+ /* If the output image is being created as a copy on write image,
+ assume that sectors which are unallocated in the input image
+ are present in both the output's and input's base images (no
+ need to copy them). */
+ if (out_baseimg) {
+ ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
+ n, &n1);
+ if (ret < 0) {
+ error_report("error while reading metadata for sector "
+ "%" PRId64 ": %s",
+ sector_num - bs_offset, strerror(-ret));
+ goto out;
}
+ if (!ret) {
+ sector_num += n1;
+ continue;
+ }
+ /* The next 'n1' sectors are allocated in the input image. Copy
+ only those as they may be followed by unallocated sectors. */
+ n = n1;
} else {
n1 = n;
}
@@ -1509,14 +1537,7 @@ static int img_convert(int argc, char **argv)
should add a specific call to have the info to go faster */
buf1 = buf;
while (n > 0) {
- /* If the output image is being created as a copy on write image,
- copy all sectors even the ones containing only NUL bytes,
- because they may differ from the sectors in the base image.
-
- If the output is to a host device, we also write out
- sectors that are entirely 0, since whatever data was
- already there is garbage, not 0s. */
- if (!has_zero_init || out_baseimg ||
+ if (!has_zero_init ||
is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
ret = bdrv_write(out_bs, sector_num, buf1, n1);
if (ret < 0) {
@@ -1538,12 +1559,12 @@ out:
free_option_parameters(param);
qemu_vfree(buf);
if (out_bs) {
- bdrv_delete(out_bs);
+ bdrv_unref(out_bs);
}
if (bs) {
for (bs_i = 0; bs_i < bs_n; bs_i++) {
if (bs[bs_i]) {
- bdrv_delete(bs[bs_i]);
+ bdrv_unref(bs[bs_i]);
}
}
g_free(bs);
@@ -1681,7 +1702,7 @@ static ImageInfoList *collect_image_info_list(const char *filename,
*last = elem;
last = &elem->next;
- bdrv_delete(bs);
+ bdrv_unref(bs);
filename = fmt = NULL;
if (chain) {
@@ -1780,6 +1801,197 @@ static int img_info(int argc, char **argv)
return 0;
}
+
+typedef struct MapEntry {
+ int flags;
+ int depth;
+ int64_t start;
+ int64_t length;
+ int64_t offset;
+ BlockDriverState *bs;
+} MapEntry;
+
+static void dump_map_entry(OutputFormat output_format, MapEntry *e,
+ MapEntry *next)
+{
+ switch (output_format) {
+ case OFORMAT_HUMAN:
+ if ((e->flags & BDRV_BLOCK_DATA) &&
+ !(e->flags & BDRV_BLOCK_OFFSET_VALID)) {
+ error_report("File contains external, encrypted or compressed clusters.");
+ exit(1);
+ }
+ if ((e->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) == BDRV_BLOCK_DATA) {
+ printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n",
+ e->start, e->length, e->offset, e->bs->filename);
+ }
+ /* This format ignores the distinction between 0, ZERO and ZERO|DATA.
+ * Modify the flags here to allow more coalescing.
+ */
+ if (next &&
+ (next->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) != BDRV_BLOCK_DATA) {
+ next->flags &= ~BDRV_BLOCK_DATA;
+ next->flags |= BDRV_BLOCK_ZERO;
+ }
+ break;
+ case OFORMAT_JSON:
+ printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64", \"depth\": %d,"
+ " \"zero\": %s, \"data\": %s",
+ (e->start == 0 ? "[" : ",\n"),
+ e->start, e->length, e->depth,
+ (e->flags & BDRV_BLOCK_ZERO) ? "true" : "false",
+ (e->flags & BDRV_BLOCK_DATA) ? "true" : "false");
+ if (e->flags & BDRV_BLOCK_OFFSET_VALID) {
+ printf(", 'offset': %"PRId64"", e->offset);
+ }
+ putchar('}');
+
+ if (!next) {
+ printf("]\n");
+ }
+ break;
+ }
+}
+
+static int get_block_status(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, MapEntry *e)
+{
+ int64_t ret;
+ int depth;
+
+ /* As an optimization, we could cache the current range of unallocated
+ * clusters in each file of the chain, and avoid querying the same
+ * range repeatedly.
+ */
+
+ depth = 0;
+ for (;;) {
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors);
+ if (ret < 0) {
+ return ret;
+ }
+ assert(nb_sectors);
+ if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
+ break;
+ }
+ bs = bs->backing_hd;
+ if (bs == NULL) {
+ ret = 0;
+ break;
+ }
+
+ depth++;
+ }
+
+ e->start = sector_num * BDRV_SECTOR_SIZE;
+ e->length = nb_sectors * BDRV_SECTOR_SIZE;
+ e->flags = ret & ~BDRV_BLOCK_OFFSET_MASK;
+ e->offset = ret & BDRV_BLOCK_OFFSET_MASK;
+ e->depth = depth;
+ e->bs = bs;
+ return 0;
+}
+
+static int img_map(int argc, char **argv)
+{
+ int c;
+ OutputFormat output_format = OFORMAT_HUMAN;
+ BlockDriverState *bs;
+ const char *filename, *fmt, *output;
+ int64_t length;
+ MapEntry curr = { .length = 0 }, next;
+ int ret = 0;
+
+ fmt = NULL;
+ output = NULL;
+ for (;;) {
+ int option_index = 0;
+ static const struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"format", required_argument, 0, 'f'},
+ {"output", required_argument, 0, OPTION_OUTPUT},
+ {0, 0, 0, 0}
+ };
+ c = getopt_long(argc, argv, "f:h",
+ long_options, &option_index);
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case '?':
+ case 'h':
+ help();
+ break;
+ case 'f':
+ fmt = optarg;
+ break;
+ case OPTION_OUTPUT:
+ output = optarg;
+ break;
+ }
+ }
+ if (optind >= argc) {
+ help();
+ }
+ filename = argv[optind++];
+
+ if (output && !strcmp(output, "json")) {
+ output_format = OFORMAT_JSON;
+ } else if (output && !strcmp(output, "human")) {
+ output_format = OFORMAT_HUMAN;
+ } else if (output) {
+ error_report("--output must be used with human or json as argument.");
+ return 1;
+ }
+
+ bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS, true, false);
+ if (!bs) {
+ return 1;
+ }
+
+ if (output_format == OFORMAT_HUMAN) {
+ printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File");
+ }
+
+ length = bdrv_getlength(bs);
+ while (curr.start + curr.length < length) {
+ int64_t nsectors_left;
+ int64_t sector_num;
+ int n;
+
+ sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS;
+
+ /* Probe up to 1 GiB at a time. */
+ nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num;
+ n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left);
+ ret = get_block_status(bs, sector_num, n, &next);
+
+ if (ret < 0) {
+ error_report("Could not read file metadata: %s", strerror(-ret));
+ goto out;
+ }
+
+ if (curr.length != 0 && curr.flags == next.flags &&
+ curr.depth == next.depth &&
+ ((curr.flags & BDRV_BLOCK_OFFSET_VALID) == 0 ||
+ curr.offset + curr.length == next.offset)) {
+ curr.length += next.length;
+ continue;
+ }
+
+ if (curr.length > 0) {
+ dump_map_entry(output_format, &curr, &next);
+ }
+ curr = next;
+ }
+
+ dump_map_entry(output_format, &curr, NULL);
+
+out:
+ bdrv_unref(bs);
+ return ret < 0;
+}
+
#define SNAPSHOT_LIST 1
#define SNAPSHOT_CREATE 2
#define SNAPSHOT_APPLY 3
@@ -1895,7 +2107,7 @@ static int img_snapshot(int argc, char **argv)
}
/* Cleanup */
- bdrv_delete(bs);
+ bdrv_unref(bs);
if (ret) {
return 1;
}
@@ -2076,6 +2288,11 @@ static int img_rebase(int argc, char **argv)
/* If the cluster is allocated, we don't need to take action */
ret = bdrv_is_allocated(bs, sector, n, &n);
+ if (ret < 0) {
+ error_report("error while reading image metadata: %s",
+ strerror(-ret));
+ goto out;
+ }
if (ret) {
continue;
}
@@ -2170,14 +2387,14 @@ out:
/* Cleanup */
if (!unsafe) {
if (bs_old_backing != NULL) {
- bdrv_delete(bs_old_backing);
+ bdrv_unref(bs_old_backing);
}
if (bs_new_backing != NULL) {
- bdrv_delete(bs_new_backing);
+ bdrv_unref(bs_new_backing);
}
}
- bdrv_delete(bs);
+ bdrv_unref(bs);
if (ret) {
return 1;
}
@@ -2300,7 +2517,7 @@ static int img_resize(int argc, char **argv)
}
out:
if (bs) {
- bdrv_delete(bs);
+ bdrv_unref(bs);
}
if (ret) {
return 1;
diff --git a/qemu-img.texi b/qemu-img.texi
index 69f1bda6ae..43ee4eb5c4 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -96,6 +96,14 @@ Second image format
Strict mode - fail on on different image size or sector allocation
@end table
+Parameters to convert subcommand:
+
+@table @option
+
+@item -n
+Skip the creation of the target volume
+@end table
+
Command description:
@table @option
@@ -171,7 +179,7 @@ Error on reading data
@end table
-@item convert [-c] [-p] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
+@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
Convert the disk image @var{filename} or a snapshot @var{snapshot_name} to disk image @var{output_filename}
using format @var{output_fmt}. It can be optionally compressed (@code{-c}
@@ -190,6 +198,11 @@ created as a copy on write image of the specified base image; the
@var{backing_file} should have the same content as the input's base image,
however the path, image format, etc may differ.
+If the @code{-n} option is specified, the target volume creation will be
+skipped. This is useful for formats such as @code{rbd} if the target
+volume has already been created with site specific options that cannot
+be supplied through qemu-img.
+
@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
Give information about the disk image @var{filename}. Use it in
@@ -213,6 +226,61 @@ To enumerate information about each disk image in the above chain, starting from
qemu-img info --backing-chain snap2.qcow2
@end example
+@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+
+Dump the metadata of image @var{filename} and its backing file chain.
+In particular, this commands dumps the allocation state of every sector
+of @var{filename}, together with the topmost file that allocates it in
+the backing file chain.
+
+Two option formats are possible. The default format (@code{human})
+only dumps known-nonzero areas of the file. Known-zero parts of the
+file are omitted altogether, and likewise for parts that are not allocated
+throughout the chain. @command{qemu-img} output will identify a file
+from where the data can be read, and the offset in the file. Each line
+will include four fields, the first three of which are hexadecimal
+numbers. For example the first line of:
+@example
+Offset Length Mapped to File
+0 0x20000 0x50000 /tmp/overlay.qcow2
+0x100000 0x10000 0x95380000 /tmp/backing.qcow2
+@end example
+@noindent
+means that 0x20000 (131072) bytes starting at offset 0 in the image are
+available in /tmp/overlay.qcow2 (opened in @code{raw} format) starting
+at offset 0x50000 (327680). Data that is compressed, encrypted, or
+otherwise not available in raw format will cause an error if @code{human}
+format is in use. Note that file names can include newlines, thus it is
+not safe to parse this output format in scripts.
+
+The alternative format @code{json} will return an array of dictionaries
+in JSON format. It will include similar information in
+the @code{start}, @code{length}, @code{offset} fields;
+it will also include other more specific information:
+@itemize @minus
+@item
+whether the sectors contain actual data or not (boolean field @code{data};
+if false, the sectors are either unallocated or stored as optimized
+all-zero clusters);
+
+@item
+whether the data is known to read as zero (boolean field @code{zero});
+
+@item
+in order to make the output shorter, the target file is expressed as
+a @code{depth}; for example, a depth of 2 refers to the backing file
+of the backing file of @var{filename}.
+@end itemize
+
+In JSON format, the @code{offset} field is optional; it is absent in
+cases where @code{human} format would omit the entry or exit with an error.
+If @code{data} is false and the @code{offset} field is present, the
+corresponding sectors in the file are not yet in use, but they are
+preallocated.
+
+For more information, consult @file{include/block/block.h} in QEMU's
+source code.
+
@item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename}
List, apply, create or delete snapshots in image @var{filename}.
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index f91b6c4f02..8565d49336 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -1830,6 +1830,10 @@ static int alloc_f(BlockDriverState *bs, int argc, char **argv)
sector_num = offset >> 9;
while (remaining) {
ret = bdrv_is_allocated(bs, sector_num, remaining, &num);
+ if (ret < 0) {
+ printf("is_allocated failed: %s\n", strerror(-ret));
+ return 0;
+ }
sector_num += num;
remaining -= num;
if (ret) {
diff --git a/qemu-io.c b/qemu-io.c
index d54dc86921..71f4ff1302 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -32,7 +32,7 @@ static char **cmdline;
static int close_f(BlockDriverState *bs, int argc, char **argv)
{
- bdrv_delete(bs);
+ bdrv_unref(bs);
qemuio_bs = NULL;
return 0;
}
@@ -61,7 +61,7 @@ static int openfile(char *name, int flags, int growable)
if (bdrv_open(qemuio_bs, name, NULL, flags, NULL) < 0) {
fprintf(stderr, "%s: can't open device %s\n", progname, name);
- bdrv_delete(qemuio_bs);
+ bdrv_unref(qemuio_bs);
qemuio_bs = NULL;
return 1;
}
@@ -422,7 +422,7 @@ int main(int argc, char **argv)
bdrv_drain_all();
if (qemuio_bs) {
- bdrv_delete(qemuio_bs);
+ bdrv_unref(qemuio_bs);
}
return 0;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index d15338e879..5dc8b75cdb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -409,7 +409,11 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
" [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
- " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]]\n"
+ " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
+ " [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
+ " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
+ " [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n"
+ " [[,iops_size=is]]\n"
" use 'file' as a drive image\n", QEMU_ARCH_ALL)
STEXI
@item -drive @var{option}[,@var{option}[,@var{option}[,...]]]
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8a8f342eab..008cad95a2 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1389,7 +1389,7 @@ EQMP
{
.name = "block_set_io_throttle",
- .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l",
+ .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?",
.mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle,
},
@@ -1408,6 +1408,13 @@ Arguments:
- "iops": total I/O operations per second (json-int)
- "iops_rd": read I/O operations per second (json-int)
- "iops_wr": write I/O operations per second (json-int)
+- "bps_max": total max in bytes (json-int)
+- "bps_rd_max": read max in bytes (json-int)
+- "bps_wr_max": write max in bytes (json-int)
+- "iops_max": total I/O operations max (json-int)
+- "iops_rd_max": read I/O operations max (json-int)
+- "iops_wr_max": write I/O operations max (json-int)
+- "iops_size": I/O size in bytes when limiting (json-int)
Example:
@@ -1417,7 +1424,14 @@ Example:
"bps_wr": 0,
"iops": 0,
"iops_rd": 0,
- "iops_wr": 0 } }
+ "iops_wr": 0,
+ "bps_max": 8000000,
+ "bps_rd_max": 0,
+ "bps_wr_max": 0,
+ "iops_max": 0,
+ "iops_rd_max": 0,
+ "iops_wr_max": 0,
+ "iops_size": 0 } }
<- { "return": {} }
EQMP
@@ -1758,6 +1772,13 @@ Each json-object contain the following:
- "iops": limit total I/O operations per second (json-int)
- "iops_rd": limit read operations per second (json-int)
- "iops_wr": limit write operations per second (json-int)
+ - "bps_max": total max in bytes (json-int)
+ - "bps_rd_max": read max in bytes (json-int)
+ - "bps_wr_max": write max in bytes (json-int)
+ - "iops_max": total I/O operations max (json-int)
+ - "iops_rd_max": read I/O operations max (json-int)
+ - "iops_wr_max": write I/O operations max (json-int)
+ - "iops_size": I/O size when limiting by iops (json-int)
- "image": the detail of the image, it is a json-object containing
the following:
- "filename": image file name (json-string)
@@ -1827,6 +1848,13 @@ Example:
"iops":1000000,
"iops_rd":0,
"iops_wr":0,
+ "bps_max": 8000000,
+ "bps_rd_max": 0,
+ "bps_wr_max": 0,
+ "iops_max": 0,
+ "iops_rd_max": 0,
+ "iops_wr_max": 0,
+ "iops_size": 0,
"image":{
"filename":"disks/test.qcow2",
"format":"qcow2",
diff --git a/tests/Makefile b/tests/Makefile
index baba9e95ad..c13fefc314 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -31,6 +31,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
check-unit-y += tests/test-iov$(EXESUF)
gcov-files-test-iov-y = util/iov.c
check-unit-y += tests/test-aio$(EXESUF)
+check-unit-y += tests/test-throttle$(EXESUF)
gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
check-unit-y += tests/test-thread-pool$(EXESUF)
@@ -120,6 +121,7 @@ tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a
tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
+tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out
index fb4f20e7cd..0764389f8e 100644
--- a/tests/qemu-iotests/026.out
+++ b/tests/qemu-iotests/026.out
@@ -126,62 +126,64 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 5; imm: off; once: on; write
write failed: Input/output error
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 5; imm: off; once: on; write -b
write failed: Input/output error
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 5; imm: off; once: off; write
write failed: Input/output error
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 5; imm: off; once: off; write -b
write failed: Input/output error
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 28; imm: off; once: on; write
write failed: No space left on device
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 28; imm: off; once: on; write -b
write failed: No space left on device
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 28; imm: off; once: off; write
write failed: No space left on device
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_update; errno: 28; imm: off; once: off; write -b
write failed: No space left on device
-128 leaked clusters were found on the image.
+127 leaked clusters were found on the image.
This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_alloc.write; errno: 5; imm: off; once: on; write
write failed: Input/output error
-No errors were found on the image.
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b
@@ -205,7 +207,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_alloc.write; errno: 28; imm: off; once: on; write
write failed: No space left on device
-No errors were found on the image.
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b
@@ -575,7 +579,6 @@ No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l1_grow.write_table; errno: 5; imm: off; once: off
-qcow2_free_clusters failed: Input/output error
write failed: Input/output error
No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@@ -586,7 +589,6 @@ No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l1_grow.write_table; errno: 28; imm: off; once: off
-qcow2_free_clusters failed: No space left on device
write failed: No space left on device
No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
@@ -597,7 +599,6 @@ No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l1_grow.activate_table; errno: 5; imm: off; once: off
-qcow2_free_clusters failed: Input/output error
write failed: Input/output error
96 leaked clusters were found on the image.
@@ -610,7 +611,6 @@ No errors were found on the image.
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
Event: l1_grow.activate_table; errno: 28; imm: off; once: off
-qcow2_free_clusters failed: No space left on device
write failed: No space left on device
96 leaked clusters were found on the image.
diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache
new file mode 100644
index 0000000000..33bad0d6ae
--- /dev/null
+++ b/tests/qemu-iotests/026.out.nocache
@@ -0,0 +1,626 @@
+QA output created by 026
+Errors while writing 128 kB
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_update; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 5; imm: off; once: on; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 5; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 5; imm: off; once: off; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 5; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+read failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 28; imm: off; once: on; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 28; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 28; imm: off; once: off; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_load; errno: 28; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: No space left on device
+read failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 5; imm: off; once: on; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 5; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 5; imm: off; once: off; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 5; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 28; imm: off; once: on; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 28; imm: off; once: on; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 28; imm: off; once: off; write
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_update; errno: 28; imm: off; once: off; write -b
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+127 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l2_alloc.write; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+1 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: write_aio; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_load; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_update_part; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 5; imm: off; once: on; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 5; imm: off; once: on; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 5; imm: off; once: off; write
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 5; imm: off; once: off; write -b
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: cluster_alloc; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+
+=== Refcout table growth tests ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+
+55 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+251 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write -b
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write
+write failed: No space left on device
+
+10 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write -b
+write failed: No space left on device
+
+23 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+
+=== L1 growth tests ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.alloc_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.alloc_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.alloc_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.alloc_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.write_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.write_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.write_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.write_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.activate_table; errno: 5; imm: off; once: on
+write failed: Input/output error
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.activate_table; errno: 5; imm: off; once: off
+write failed: Input/output error
+
+96 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.activate_table; errno: 28; imm: off; once: on
+write failed: No space left on device
+No errors were found on the image.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
+
+Event: l1_grow.activate_table; errno: 28; imm: off; once: off
+write failed: No space left on device
+
+96 leaked clusters were found on the image.
+This means waste of disk space, but no harm to data.
+*** done
diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
index cb510d6716..077fa64cbf 100644
--- a/tests/qemu-iotests/039.out
+++ b/tests/qemu-iotests/039.out
@@ -12,8 +12,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
incompatible_features 0x1
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
ERROR cluster 5 refcount=0 reference=1
+ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0
2 errors were found on the image.
Data may be corrupted, or further writes to the image may corrupt it.
@@ -24,7 +24,6 @@ read 512/512 bytes at offset 0
incompatible_features 0x1
== Repairing the image file must succeed ==
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
Repairing cluster 5 refcount=0 reference=1
The following inconsistencies were found and repaired:
@@ -44,7 +43,6 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
incompatible_features 0x1
-ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0
Repairing cluster 5 refcount=0 reference=1
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063
new file mode 100755
index 0000000000..de0cbbd8bb
--- /dev/null
+++ b/tests/qemu-iotests/063
@@ -0,0 +1,97 @@
+#!/bin/bash
+#
+# test of qemu-img convert -n - convert without creation
+#
+# Copyright (C) 2009 Red Hat, Inc.
+# Copyright (C) 2013 Alex Bligh (alex@alex.org.uk)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=alex@alex.org.uk
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ _cleanup_test_img
+ rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt qcow qcow2 vmdk qed raw
+_supported_proto generic
+_supported_os Linux
+
+_make_test_img 4M
+
+echo "== Testing conversion with -n fails with no target file =="
+# check .orig file does not exist
+rm -f $TEST_IMG.orig
+if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig >/dev/null 2>&1; then
+ exit 1
+fi
+
+echo "== Testing conversion with -n succeeds with a target file =="
+rm -f $TEST_IMG.orig
+cp $TEST_IMG $TEST_IMG.orig
+if ! $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig ; then
+ exit 1
+fi
+
+echo "== Testing conversion to raw is the same after conversion with -n =="
+# compare the raw files
+if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG $TEST_IMG.raw1 ; then
+ exit 1
+fi
+
+if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG.orig $TEST_IMG.raw2 ; then
+ exit 1
+fi
+
+if ! cmp $TEST_IMG.raw1 $TEST_IMG.raw2 ; then
+ exit 1
+fi
+
+echo "== Testing conversion back to original format =="
+if ! $QEMU_IMG convert -f raw -O $IMGFMT -n $TEST_IMG.raw2 $TEST_IMG ; then
+ exit 1
+fi
+_check_test_img
+
+echo "== Testing conversion to a smaller file fails =="
+rm -f $TEST_IMG.orig
+mv $TEST_IMG $TEST_IMG.orig
+_make_test_img 2M
+if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG.orig $TEST_IMG >/dev/null 2>&1; then
+ exit 1
+fi
+
+rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2
+
+echo "*** done"
+rm -f $seq.full
+status=0
+exit 0
diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out
new file mode 100644
index 0000000000..de1c99afd8
--- /dev/null
+++ b/tests/qemu-iotests/063.out
@@ -0,0 +1,10 @@
+QA output created by 063
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304
+== Testing conversion with -n fails with no target file ==
+== Testing conversion with -n succeeds with a target file ==
+== Testing conversion to raw is the same after conversion with -n ==
+== Testing conversion back to original format ==
+No errors were found on the image.
+== Testing conversion to a smaller file fails ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+*** done
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 74628ae637..4ecf497d8e 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -78,50 +78,50 @@ _wrapup()
if $showme
then
- :
+ :
elif $needwrap
then
- if [ -f check.time -a -f $tmp.time ]
- then
- cat check.time $tmp.time \
- | $AWK_PROG '
- { t[$1] = $2 }
-END { if (NR > 0) {
- for (i in t) print i " " t[i]
- }
- }' \
- | sort -n >$tmp.out
- mv $tmp.out check.time
- fi
-
- if [ -f $tmp.expunged ]
- then
- notrun=`wc -l <$tmp.expunged | sed -e 's/ *//g'`
- try=`expr $try - $notrun`
- list=`echo "$list" | sed -f $tmp.expunged`
- fi
-
- echo "" >>check.log
- date >>check.log
- echo $list | fmt | sed -e 's/^/ /' >>check.log
- $interrupt && echo "Interrupted!" >>check.log
-
- if [ ! -z "$notrun" ]
- then
- echo "Not run:$notrun"
- echo "Not run:$notrun" >>check.log
- fi
+ if [ -f check.time -a -f $tmp.time ]
+ then
+ cat check.time $tmp.time \
+ | $AWK_PROG '
+ { t[$1] = $2 }
+END { if (NR > 0) {
+ for (i in t) print i " " t[i]
+ }
+ }' \
+ | sort -n >$tmp.out
+ mv $tmp.out check.time
+ fi
+
+ if [ -f $tmp.expunged ]
+ then
+ notrun=`wc -l <$tmp.expunged | sed -e 's/ *//g'`
+ try=`expr $try - $notrun`
+ list=`echo "$list" | sed -f $tmp.expunged`
+ fi
+
+ echo "" >>check.log
+ date >>check.log
+ echo $list | fmt | sed -e 's/^/ /' >>check.log
+ $interrupt && echo "Interrupted!" >>check.log
+
+ if [ ! -z "$notrun" ]
+ then
+ echo "Not run:$notrun"
+ echo "Not run:$notrun" >>check.log
+ fi
if [ ! -z "$n_bad" -a $n_bad != 0 ]
- then
- echo "Failures:$bad"
- echo "Failed $n_bad of $try tests"
- echo "Failures:$bad" | fmt >>check.log
- echo "Failed $n_bad of $try tests" >>check.log
- else
- echo "Passed all $try tests"
- echo "Passed all $try tests" >>check.log
- fi
- needwrap=false
+ then
+ echo "Failures:$bad"
+ echo "Failed $n_bad of $try tests"
+ echo "Failures:$bad" | fmt >>check.log
+ echo "Failed $n_bad of $try tests" >>check.log
+ else
+ echo "Passed all $try tests"
+ echo "Passed all $try tests" >>check.log
+ fi
+ needwrap=false
fi
rm -f /tmp/*.out /tmp/*.err /tmp/*.time
@@ -185,82 +185,88 @@ do
if $showme
then
- echo
- continue
- elif [ -f expunged ] && $expunge && egrep "^$seq([ ]|\$)" expunged >/dev/null
+ echo
+ continue
+ elif [ -f expunged ] && $expunge && egrep "^$seq([ ]|\$)" expunged >/dev/null
then
- echo " - expunged"
- rm -f $seq.out.bad
- echo "/^$seq\$/d" >>$tmp.expunged
+ echo " - expunged"
+ rm -f $seq.out.bad
+ echo "/^$seq\$/d" >>$tmp.expunged
elif [ ! -f $seq ]
then
- echo " - no such test?"
- echo "/^$seq\$/d" >>$tmp.expunged
+ echo " - no such test?"
+ echo "/^$seq\$/d" >>$tmp.expunged
else
- # really going to try and run this one
- #
- rm -f $seq.out.bad
- lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
- if [ "X$lasttime" != X ]; then
- echo -n " ${lasttime}s ..."
- else
- echo -n " " # prettier output with timestamps.
- fi
- rm -f core $seq.notrun
-
- # for hangcheck ...
- echo "$seq" >/tmp/check.sts
-
- start=`_wallclock`
- $timestamp && echo -n " ["`date "+%T"`"]"
- [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
- MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
- ./$seq >$tmp.out 2>&1
- sts=$?
- $timestamp && _timestamp
- stop=`_wallclock`
-
- if [ -f core ]
- then
- echo -n " [dumped core]"
- mv core $seq.core
- err=true
- fi
-
- if [ -f $seq.notrun ]
- then
- $timestamp || echo -n " [not run] "
- $timestamp && echo " [not run]" && echo -n " $seq -- "
- cat $seq.notrun
- notrun="$notrun $seq"
- else
- if [ $sts -ne 0 ]
- then
- echo -n " [failed, exit status $sts]"
- err=true
- fi
- if [ ! -f $seq.out ]
- then
- echo " - no qualified output"
- err=true
- else
- if diff -w $seq.out $tmp.out >/dev/null 2>&1
- then
- echo ""
- if $err
- then
- :
- else
- echo "$seq `expr $stop - $start`" >>$tmp.time
- fi
- else
- echo " - output mismatch (see $seq.out.bad)"
- mv $tmp.out $seq.out.bad
- $diff -w $seq.out $seq.out.bad
- err=true
- fi
- fi
- fi
+ # really going to try and run this one
+ #
+ rm -f $seq.out.bad
+ lasttime=`sed -n -e "/^$seq /s/.* //p" <check.time`
+ if [ "X$lasttime" != X ]; then
+ echo -n " ${lasttime}s ..."
+ else
+ echo -n " " # prettier output with timestamps.
+ fi
+ rm -f core $seq.notrun
+
+ # for hangcheck ...
+ echo "$seq" >/tmp/check.sts
+
+ start=`_wallclock`
+ $timestamp && echo -n " ["`date "+%T"`"]"
+ [ ! -x $seq ] && chmod u+x $seq # ensure we can run it
+ MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \
+ ./$seq >$tmp.out 2>&1
+ sts=$?
+ $timestamp && _timestamp
+ stop=`_wallclock`
+
+ if [ -f core ]
+ then
+ echo -n " [dumped core]"
+ mv core $seq.core
+ err=true
+ fi
+
+ if [ -f $seq.notrun ]
+ then
+ $timestamp || echo -n " [not run] "
+ $timestamp && echo " [not run]" && echo -n " $seq -- "
+ cat $seq.notrun
+ notrun="$notrun $seq"
+ else
+ if [ $sts -ne 0 ]
+ then
+ echo -n " [failed, exit status $sts]"
+ err=true
+ fi
+
+ reference=$seq.out
+ if (echo $QEMU_IO_OPTIONS | grep -s -- '--nocache' > /dev/null); then
+ [ -f $seq.out.nocache ] && reference=$seq.out.nocache
+ fi
+
+ if [ ! -f $reference ]
+ then
+ echo " - no qualified output"
+ err=true
+ else
+ if diff -w $reference $tmp.out >/dev/null 2>&1
+ then
+ echo ""
+ if $err
+ then
+ :
+ else
+ echo "$seq `expr $stop - $start`" >>$tmp.time
+ fi
+ else
+ echo " - output mismatch (see $seq.out.bad)"
+ mv $tmp.out $seq.out.bad
+ $diff -w $reference $seq.out.bad
+ err=true
+ fi
+ fi
+ fi
fi
@@ -268,12 +274,12 @@ do
#
if $err
then
- bad="$bad $seq"
- n_bad=`expr $n_bad + 1`
- quick=false
+ bad="$bad $seq"
+ n_bad=`expr $n_bad + 1`
+ quick=false
fi
[ -f $seq.notrun ] || try=`expr $try + 1`
-
+
seq="after_$seq"
done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 6826ea72fe..fecaf85074 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -54,58 +54,58 @@ do
if $group
then
- # arg after -g
- group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+ # arg after -g
+ group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
s/ .*//p
}'`
- if [ -z "$group_list" ]
- then
- echo "Group \"$r\" is empty or not defined?"
- exit 1
- fi
- [ ! -s $tmp.list ] && touch $tmp.list
- for t in $group_list
- do
- if grep -s "^$t\$" $tmp.list >/dev/null
- then
- :
- else
- echo "$t" >>$tmp.list
- fi
- done
- group=false
- continue
+ if [ -z "$group_list" ]
+ then
+ echo "Group \"$r\" is empty or not defined?"
+ exit 1
+ fi
+ [ ! -s $tmp.list ] && touch $tmp.list
+ for t in $group_list
+ do
+ if grep -s "^$t\$" $tmp.list >/dev/null
+ then
+ :
+ else
+ echo "$t" >>$tmp.list
+ fi
+ done
+ group=false
+ continue
elif $xgroup
then
- # arg after -x
- [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
- group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
+ # arg after -x
+ [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null
+ group_list=`sed -n <group -e 's/$/ /' -e "/^[0-9][0-9][0-9].* $r /"'{
s/ .*//p
}'`
- if [ -z "$group_list" ]
- then
- echo "Group \"$r\" is empty or not defined?"
- exit 1
- fi
- numsed=0
- rm -f $tmp.sed
- for t in $group_list
- do
- if [ $numsed -gt 100 ]
- then
- sed -f $tmp.sed <$tmp.list >$tmp.tmp
- mv $tmp.tmp $tmp.list
- numsed=0
- rm -f $tmp.sed
- fi
- echo "/^$t\$/d" >>$tmp.sed
- numsed=`expr $numsed + 1`
- done
- sed -f $tmp.sed <$tmp.list >$tmp.tmp
- mv $tmp.tmp $tmp.list
- xgroup=false
- continue
+ if [ -z "$group_list" ]
+ then
+ echo "Group \"$r\" is empty or not defined?"
+ exit 1
+ fi
+ numsed=0
+ rm -f $tmp.sed
+ for t in $group_list
+ do
+ if [ $numsed -gt 100 ]
+ then
+ sed -f $tmp.sed <$tmp.list >$tmp.tmp
+ mv $tmp.tmp $tmp.list
+ numsed=0
+ rm -f $tmp.sed
+ fi
+ echo "/^$t\$/d" >>$tmp.sed
+ numsed=`expr $numsed + 1`
+ done
+ sed -f $tmp.sed <$tmp.list >$tmp.tmp
+ mv $tmp.tmp $tmp.list
+ xgroup=false
+ continue
elif $imgopts
then
@@ -119,11 +119,11 @@ s/ .*//p
case "$r"
in
- -\? | -h | --help) # usage
- echo "Usage: $0 [options] [testlist]"'
+ -\? | -h | --help) # usage
+ echo "Usage: $0 [options] [testlist]"'
common options
- -v verbose
+ -v verbose
check options
-raw test raw (default)
@@ -138,162 +138,162 @@ check options
-sheepdog test sheepdog
-nbd test nbd
-ssh test ssh
- -xdiff graphical mode diff
- -nocache use O_DIRECT on backing file
- -misalign misalign memory allocations
- -n show me, do not run tests
+ -xdiff graphical mode diff
+ -nocache use O_DIRECT on backing file
+ -misalign misalign memory allocations
+ -n show me, do not run tests
-o options -o options to pass to qemu-img create/convert
- -T output timestamps
- -r randomize test order
-
+ -T output timestamps
+ -r randomize test order
+
testlist options
- -g group[,group...] include tests from these groups
- -x group[,group...] exclude tests from these groups
- NNN include test NNN
- NNN-NNN include test range (eg. 012-021)
+ -g group[,group...] include tests from these groups
+ -x group[,group...] exclude tests from these groups
+ NNN include test NNN
+ NNN-NNN include test range (eg. 012-021)
'
- exit 0
- ;;
-
- -raw)
- IMGFMT=raw
- xpand=false
- ;;
-
- -cow)
- IMGFMT=cow
- xpand=false
- ;;
-
- -qcow)
- IMGFMT=qcow
- xpand=false
- ;;
-
- -qcow2)
- IMGFMT=qcow2
- xpand=false
- ;;
-
- -qed)
- IMGFMT=qed
- xpand=false
- ;;
-
- -vdi)
- IMGFMT=vdi
- xpand=false
- ;;
-
- -vmdk)
- IMGFMT=vmdk
- xpand=false
- ;;
-
- -vpc)
- IMGFMT=vpc
- xpand=false
- ;;
-
- -rbd)
- IMGPROTO=rbd
- xpand=false
- ;;
- -sheepdog)
- IMGPROTO=sheepdog
- xpand=false
- ;;
- -nbd)
- IMGPROTO=nbd
- xpand=false
- ;;
+ exit 0
+ ;;
+
+ -raw)
+ IMGFMT=raw
+ xpand=false
+ ;;
+
+ -cow)
+ IMGFMT=cow
+ xpand=false
+ ;;
+
+ -qcow)
+ IMGFMT=qcow
+ xpand=false
+ ;;
+
+ -qcow2)
+ IMGFMT=qcow2
+ xpand=false
+ ;;
+
+ -qed)
+ IMGFMT=qed
+ xpand=false
+ ;;
+
+ -vdi)
+ IMGFMT=vdi
+ xpand=false
+ ;;
+
+ -vmdk)
+ IMGFMT=vmdk
+ xpand=false
+ ;;
+
+ -vpc)
+ IMGFMT=vpc
+ xpand=false
+ ;;
+
+ -rbd)
+ IMGPROTO=rbd
+ xpand=false
+ ;;
+ -sheepdog)
+ IMGPROTO=sheepdog
+ xpand=false
+ ;;
+ -nbd)
+ IMGPROTO=nbd
+ xpand=false
+ ;;
-ssh)
IMGPROTO=ssh
xpand=false
;;
- -nocache)
- QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
- xpand=false
- ;;
+ -nocache)
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache"
+ xpand=false
+ ;;
- -misalign)
- QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign"
- xpand=false
- ;;
+ -misalign)
+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign"
+ xpand=false
+ ;;
-valgrind)
valgrind=true
- xpand=false
+ xpand=false
;;
- -g) # -g group ... pick from group file
- group=true
- xpand=false
- ;;
-
- -xdiff) # graphical diff mode
- xpand=false
-
- if [ ! -z "$DISPLAY" ]
- then
- which xdiff >/dev/null 2>&1 && diff=xdiff
- which gdiff >/dev/null 2>&1 && diff=gdiff
- which tkdiff >/dev/null 2>&1 && diff=tkdiff
- which xxdiff >/dev/null 2>&1 && diff=xxdiff
- fi
- ;;
-
- -n) # show me, don't do it
- showme=true
- xpand=false
- ;;
+ -g) # -g group ... pick from group file
+ group=true
+ xpand=false
+ ;;
+
+ -xdiff) # graphical diff mode
+ xpand=false
+
+ if [ ! -z "$DISPLAY" ]
+ then
+ which xdiff >/dev/null 2>&1 && diff=xdiff
+ which gdiff >/dev/null 2>&1 && diff=gdiff
+ which tkdiff >/dev/null 2>&1 && diff=tkdiff
+ which xxdiff >/dev/null 2>&1 && diff=xxdiff
+ fi
+ ;;
+
+ -n) # show me, don't do it
+ showme=true
+ xpand=false
+ ;;
-o)
imgopts=true
xpand=false
;;
- -r) # randomize test order
- randomize=true
- xpand=false
- ;;
-
- -T) # turn on timestamp output
- timestamp=true
- xpand=false
- ;;
-
- -v)
- verbose=true
- xpand=false
- ;;
- -x) # -x group ... exclude from group file
- xgroup=true
- xpand=false
- ;;
- '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]')
- echo "No tests?"
- status=1
- exit $status
- ;;
-
- [0-9]*-[0-9]*)
- eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'`
- ;;
-
- [0-9]*-)
- eval `echo $r | sed -e 's/^/start=/' -e 's/-//'`
- end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'`
- if [ -z "$end" ]
- then
- echo "No tests in range \"$r\"?"
- status=1
- exit $status
- fi
- ;;
-
- *)
- start=$r
- end=$r
- ;;
+ -r) # randomize test order
+ randomize=true
+ xpand=false
+ ;;
+
+ -T) # turn on timestamp output
+ timestamp=true
+ xpand=false
+ ;;
+
+ -v)
+ verbose=true
+ xpand=false
+ ;;
+ -x) # -x group ... exclude from group file
+ xgroup=true
+ xpand=false
+ ;;
+ '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]')
+ echo "No tests?"
+ status=1
+ exit $status
+ ;;
+
+ [0-9]*-[0-9]*)
+ eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'`
+ ;;
+
+ [0-9]*-)
+ eval `echo $r | sed -e 's/^/start=/' -e 's/-//'`
+ end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'`
+ if [ -z "$end" ]
+ then
+ echo "No tests in range \"$r\"?"
+ status=1
+ exit $status
+ fi
+ ;;
+
+ *)
+ start=$r
+ end=$r
+ ;;
esac
@@ -303,26 +303,26 @@ testlist options
if $xpand
then
- have_test_arg=true
- $AWK_PROG </dev/null '
-BEGIN { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
- | while read id
- do
- if grep -s "^$id " group >/dev/null
- then
- # in group file ... OK
- echo $id >>$tmp.list
- else
- if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null
- then
- # expunged ... will be reported, but not run, later
- echo $id >>$tmp.list
- else
- # oops
- echo "$id - unknown test, ignored"
- fi
- fi
- done
+ have_test_arg=true
+ $AWK_PROG </dev/null '
+BEGIN { for (t='$start'; t<='$end'; t++) printf "%03d\n",t }' \
+ | while read id
+ do
+ if grep -s "^$id " group >/dev/null
+ then
+ # in group file ... OK
+ echo $id >>$tmp.list
+ else
+ if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null
+ then
+ # expunged ... will be reported, but not run, later
+ echo $id >>$tmp.list
+ else
+ # oops
+ echo "$id - unknown test, ignored"
+ fi
+ fi
+ done
fi
done
@@ -337,11 +337,11 @@ then
else
if $have_test_arg
then
- # had test numbers, but none in group file ... do nothing
- touch $tmp.list
+ # had test numbers, but none in group file ... do nothing
+ touch $tmp.list
else
- # no test numbers, do everything from group file
- sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <group >$tmp.list
+ # no test numbers, do everything from group file
+ sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' <group >$tmp.list
fi
fi
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 08a3f100b8..d794e624e7 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -19,7 +19,7 @@
# setup and check for config parameters, and in particular
#
# EMAIL - email of the script runner.
-# TEST_DIR - scratch test directory
+# TEST_DIR - scratch test directory
#
# - These can be added to $HOST_CONFIG_DIR (witch default to ./config)
# below or a separate local configuration file can be used (using
@@ -111,11 +111,11 @@ export QEMU_NBD=$QEMU_NBD_PROG
[ -f /etc/qemu-iotest.config ] && . /etc/qemu-iotest.config
if [ -z "$TEST_DIR" ]; then
- TEST_DIR=`pwd`/scratch
+ TEST_DIR=`pwd`/scratch
fi
if [ ! -e "$TEST_DIR" ]; then
- mkdir "$TEST_DIR"
+ mkdir "$TEST_DIR"
fi
if [ ! -d "$TEST_DIR" ]; then
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 97a31ff0b1..5dfda63e59 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -25,19 +25,19 @@
# Outputs suitable message to stdout if it's not in range.
#
# A verbose option, -v, may be used as the LAST argument
-#
-# e.g.
+#
+# e.g.
# foo: 0.0298 = 0.03 +/- 5%
-# _within_tolerance "foo" 0.0298 0.03 5%
-#
+# _within_tolerance "foo" 0.0298 0.03 5%
+#
# foo: 0.0298 = 0.03 +/- 0.01
# _within_tolerance "foo" 0.0298 0.03 0.01
#
# foo: 0.0298 = 0.03 -0.01 +0.002
# _within_tolerance "foo" 0.0298 0.03 0.01 0.002
#
-# foo: verbose output of 0.0298 = 0.03 +/- 5%
-# _within_tolerance "foo" 0.0298 0.03 5% -v
+# foo: verbose output of 0.0298 = 0.03 +/- 5%
+# _within_tolerance "foo" 0.0298 0.03 5% -v
_within_tolerance()
{
_name=$1
@@ -51,10 +51,10 @@ _within_tolerance()
# maxtol arg is optional
# verbose arg is optional
if [ $# -ge 5 ]
- then
+ then
if [ "$5" = "-v" ]
then
- _verbose=1
+ _verbose=1
else
_maxtol=$5
fi
@@ -65,18 +65,18 @@ _within_tolerance()
fi
# find min with or without %
- _mintolerance=`echo $_mintol | sed -e 's/%//'`
+ _mintolerance=`echo $_mintol | sed -e 's/%//'`
if [ $_mintol = $_mintolerance ]
- then
+ then
_min=`echo "scale=5; $_correct_val-$_mintolerance" | bc`
else
_min=`echo "scale=5; $_correct_val-$_mintolerance*0.01*$_correct_val" | bc`
fi
# find max with or without %
- _maxtolerance=`echo $_maxtol | sed -e 's/%//'`
+ _maxtolerance=`echo $_maxtol | sed -e 's/%//'`
if [ $_maxtol = $_maxtolerance ]
- then
+ then
_max=`echo "scale=5; $_correct_val+$_maxtolerance" | bc`
else
_max=`echo "scale=5; $_correct_val+$_maxtolerance*0.01*$_correct_val" | bc`
@@ -88,7 +88,7 @@ _within_tolerance()
cat <<EOF >$tmp.bc.1
scale=5;
if ($_min <= $_given_val) 1;
-if ($_min > $_given_val) 0;
+if ($_min > $_given_val) 0;
EOF
cat <<EOF >$tmp.bc.2
@@ -102,21 +102,21 @@ EOF
rm -f $tmp.bc.[12]
- _in_range=`expr $_above_min \& $_below_max`
+ _in_range=`expr $_above_min \& $_below_max`
# fix up min, max precision for output
# can vary for 5.3, 6.2
_min=`echo $_min | sed -e 's/0*$//'` # get rid of trailling zeroes
_max=`echo $_max | sed -e 's/0*$//'` # get rid of trailling zeroes
- if [ $_in_range -eq 1 ]
+ if [ $_in_range -eq 1 ]
then
- [ $_verbose -eq 1 ] && echo $_name is in range
- return 0
+ [ $_verbose -eq 1 ] && echo $_name is in range
+ return 0
else
- [ $_verbose -eq 1 ] && echo $_name has value of $_given_val
- [ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max
- return 1
+ [ $_verbose -eq 1 ] && echo $_name has value of $_given_val
+ [ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max
+ return 1
fi
}
@@ -125,7 +125,7 @@ EOF
_filter_date()
{
sed \
- -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z] *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/'
+ -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z] *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/'
}
# replace occurrences of the actual TEST_DIR value with TEST_DIR
diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern
index 85a40eecc0..00e0f605fd 100644
--- a/tests/qemu-iotests/common.pattern
+++ b/tests/qemu-iotests/common.pattern
@@ -106,8 +106,8 @@ function io_test2() {
local num=$3
# Pattern (repeat after 9 clusters):
- # used - used - free - used - compressed - compressed -
- # free - free - compressed
+ # used - used - free - used - compressed - compressed -
+ # free - free - compressed
# Write the clusters to be compressed
echo === Clusters to be compressed [1]
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 5e077c3573..88fecf7870 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -20,17 +20,17 @@
dd()
{
if [ "$HOSTOS" == "Linux" ]
- then
- command dd --help | grep noxfer > /dev/null 2>&1
-
- if [ "$?" -eq 0 ]
- then
- command dd status=noxfer $@
- else
- command dd $@
- fi
+ then
+ command dd --help | grep noxfer > /dev/null 2>&1
+
+ if [ "$?" -eq 0 ]
+ then
+ command dd status=noxfer $@
+ else
+ command dd $@
+ fi
else
- command dd $@
+ command dd $@
fi
}
@@ -193,8 +193,8 @@ _get_pids_by_name()
{
if [ $# -ne 1 ]
then
- echo "Usage: _get_pids_by_name process-name" 1>&2
- exit 1
+ echo "Usage: _get_pids_by_name process-name" 1>&2
+ exit 1
fi
# Algorithm ... all ps(1) variants have a time of the form MM:SS or
@@ -206,12 +206,12 @@ _get_pids_by_name()
ps $PS_ALL_FLAGS \
| sed -n \
- -e 's/$/ /' \
- -e 's/[ ][ ]*/ /g' \
- -e 's/^ //' \
- -e 's/^[^ ]* //' \
- -e "/[0-9]:[0-9][0-9] *[^ ]*\/$1 /s/ .*//p" \
- -e "/[0-9]:[0-9][0-9] *$1 /s/ .*//p"
+ -e 's/$/ /' \
+ -e 's/[ ][ ]*/ /g' \
+ -e 's/^ //' \
+ -e 's/^[^ ]* //' \
+ -e "/[0-9]:[0-9][0-9] *[^ ]*\/$1 /s/ .*//p" \
+ -e "/[0-9]:[0-9][0-9] *$1 /s/ .*//p"
}
# fqdn for localhost
@@ -229,8 +229,8 @@ _need_to_be_root()
id=`id | $SED_PROG -e 's/(.*//' -e 's/.*=//'`
if [ "$id" -ne 0 ]
then
- echo "Arrgh ... you need to be root (not uid=$id) to run this test"
- exit 1
+ echo "Arrgh ... you need to be root (not uid=$id) to run this test"
+ exit 1
fi
}
@@ -248,33 +248,33 @@ _need_to_be_root()
_do()
{
if [ $# -eq 1 ]; then
- _cmd=$1
+ _cmd=$1
elif [ $# -eq 2 ]; then
- _note=$1
- _cmd=$2
- echo -n "$_note... "
+ _note=$1
+ _cmd=$2
+ echo -n "$_note... "
else
- echo "Usage: _do [note] cmd" 1>&2
- status=1; exit
+ echo "Usage: _do [note] cmd" 1>&2
+ status=1; exit
fi
(eval "echo '---' \"$_cmd\"") >>$here/$seq.full
(eval "$_cmd") >$tmp._out 2>&1; ret=$?
cat $tmp._out >>$here/$seq.full
if [ $# -eq 2 ]; then
- if [ $ret -eq 0 ]; then
- echo "done"
- else
- echo "fail"
- fi
+ if [ $ret -eq 0 ]; then
+ echo "done"
+ else
+ echo "fail"
+ fi
fi
if [ $ret -ne 0 ] \
- && [ "$_do_die_on_error" = "always" \
- -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ]
+ && [ "$_do_die_on_error" = "always" \
+ -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ]
then
- [ $# -ne 2 ] && echo
- eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full"
- status=1; exit
+ [ $# -ne 2 ] && echo
+ eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full"
+ status=1; exit
fi
return $ret
@@ -305,9 +305,9 @@ _fail()
_supported_fmt()
{
for f; do
- if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then
- return
- fi
+ if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then
+ return
+ fi
done
_notrun "not suitable for this image format: $IMGFMT"
@@ -318,9 +318,9 @@ _supported_fmt()
_supported_proto()
{
for f; do
- if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then
- return
- fi
+ if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then
+ return
+ fi
done
_notrun "not suitable for this image protocol: $IMGPROTO"
@@ -332,10 +332,10 @@ _supported_os()
{
for h
do
- if [ "$h" = "$HOSTOS" ]
- then
- return
- fi
+ if [ "$h" = "$HOSTOS" ]
+ then
+ return
+ fi
done
_notrun "not suitable for this OS: $HOSTOS"
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index b6962421fa..316b1dd75c 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -66,3 +66,4 @@
059 rw auto
060 rw auto
062 rw auto
+063 rw auto
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 532a1de3f9..c4fe0fc3b7 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -13,6 +13,7 @@
#include <glib.h>
#include "block/aio.h"
#include "qemu/timer.h"
+#include "qemu/sockets.h"
AioContext *ctx;
@@ -375,7 +376,10 @@ static void test_timer_schedule(void)
/* aio_poll will not block to wait for timers to complete unless it has
* an fd to wait on. Fixing this breaks other tests. So create a dummy one.
*/
- g_assert(!pipe2(pipefd, O_NONBLOCK));
+ g_assert(!qemu_pipe(pipefd));
+ qemu_set_nonblock(pipefd[0]);
+ qemu_set_nonblock(pipefd[1]);
+
aio_set_fd_handler(ctx, pipefd[0],
dummy_io_handler_read, NULL, NULL);
aio_poll(ctx, false);
@@ -716,7 +720,10 @@ static void test_source_timer_schedule(void)
/* aio_poll will not block to wait for timers to complete unless it has
* an fd to wait on. Fixing this breaks other tests. So create a dummy one.
*/
- g_assert(!pipe2(pipefd, O_NONBLOCK));
+ g_assert(!qemu_pipe(pipefd));
+ qemu_set_nonblock(pipefd[0]);
+ qemu_set_nonblock(pipefd[1]);
+
aio_set_fd_handler(ctx, pipefd[0],
dummy_io_handler_read, NULL, NULL);
do {} while (g_main_context_iteration(NULL, false));
diff --git a/tests/test-throttle.c b/tests/test-throttle.c
new file mode 100644
index 0000000000..760812645b
--- /dev/null
+++ b/tests/test-throttle.c
@@ -0,0 +1,481 @@
+/*
+ * Throttle infrastructure tests
+ *
+ * Copyright Nodalink, SARL. 2013
+ *
+ * Authors:
+ * Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <math.h>
+#include "qemu/throttle.h"
+
+LeakyBucket bkt;
+ThrottleConfig cfg;
+ThrottleState ts;
+
+/* usefull function */
+static bool double_cmp(double x, double y)
+{
+ return fabsl(x - y) < 1e-6;
+}
+
+/* tests for single bucket operations */
+static void test_leak_bucket(void)
+{
+ /* set initial value */
+ bkt.avg = 150;
+ bkt.max = 15;
+ bkt.level = 1.5;
+
+ /* leak an op work of time */
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+ g_assert(bkt.avg == 150);
+ g_assert(bkt.max == 15);
+ g_assert(double_cmp(bkt.level, 0.5));
+
+ /* leak again emptying the bucket */
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+ g_assert(bkt.avg == 150);
+ g_assert(bkt.max == 15);
+ g_assert(double_cmp(bkt.level, 0));
+
+ /* check that the bucket level won't go lower */
+ throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150);
+ g_assert(bkt.avg == 150);
+ g_assert(bkt.max == 15);
+ g_assert(double_cmp(bkt.level, 0));
+}
+
+static void test_compute_wait(void)
+{
+ int64_t wait;
+ int64_t result;
+
+ /* no operation limit set */
+ bkt.avg = 0;
+ bkt.max = 15;
+ bkt.level = 1.5;
+ wait = throttle_compute_wait(&bkt);
+ g_assert(!wait);
+
+ /* zero delta */
+ bkt.avg = 150;
+ bkt.max = 15;
+ bkt.level = 15;
+ wait = throttle_compute_wait(&bkt);
+ g_assert(!wait);
+
+ /* below zero delta */
+ bkt.avg = 150;
+ bkt.max = 15;
+ bkt.level = 9;
+ wait = throttle_compute_wait(&bkt);
+ g_assert(!wait);
+
+ /* half an operation above max */
+ bkt.avg = 150;
+ bkt.max = 15;
+ bkt.level = 15.5;
+ wait = throttle_compute_wait(&bkt);
+ /* time required to do half an operation */
+ result = (int64_t) NANOSECONDS_PER_SECOND / 150 / 2;
+ g_assert(wait == result);
+}
+
+/* functions to test ThrottleState initialization/destroy methods */
+static void read_timer_cb(void *opaque)
+{
+}
+
+static void write_timer_cb(void *opaque)
+{
+}
+
+static void test_init(void)
+{
+ int i;
+
+ /* fill the structure with crap */
+ memset(&ts, 1, sizeof(ts));
+
+ /* init the structure */
+ throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+
+ /* check initialized fields */
+ g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL);
+ g_assert(ts.timers[0]);
+ g_assert(ts.timers[1]);
+
+ /* check other fields where cleared */
+ g_assert(!ts.previous_leak);
+ g_assert(!ts.cfg.op_size);
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ g_assert(!ts.cfg.buckets[i].avg);
+ g_assert(!ts.cfg.buckets[i].max);
+ g_assert(!ts.cfg.buckets[i].level);
+ }
+
+ throttle_destroy(&ts);
+}
+
+static void test_destroy(void)
+{
+ int i;
+ throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_destroy(&ts);
+ for (i = 0; i < 2; i++) {
+ g_assert(!ts.timers[i]);
+ }
+}
+
+/* function to test throttle_config and throttle_get_config */
+static void test_config_functions(void)
+{
+ int i;
+ ThrottleConfig orig_cfg, final_cfg;
+
+ orig_cfg.buckets[THROTTLE_BPS_TOTAL].avg = 153;
+ orig_cfg.buckets[THROTTLE_BPS_READ].avg = 56;
+ orig_cfg.buckets[THROTTLE_BPS_WRITE].avg = 1;
+
+ orig_cfg.buckets[THROTTLE_OPS_TOTAL].avg = 150;
+ orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69;
+ orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23;
+
+ orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */
+ orig_cfg.buckets[THROTTLE_BPS_READ].max = 1; /* should not be corrected */
+ orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120;
+
+ orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150;
+ orig_cfg.buckets[THROTTLE_OPS_READ].max = 400;
+ orig_cfg.buckets[THROTTLE_OPS_WRITE].max = 500;
+
+ orig_cfg.buckets[THROTTLE_BPS_TOTAL].level = 45;
+ orig_cfg.buckets[THROTTLE_BPS_READ].level = 65;
+ orig_cfg.buckets[THROTTLE_BPS_WRITE].level = 23;
+
+ orig_cfg.buckets[THROTTLE_OPS_TOTAL].level = 1;
+ orig_cfg.buckets[THROTTLE_OPS_READ].level = 90;
+ orig_cfg.buckets[THROTTLE_OPS_WRITE].level = 75;
+
+ orig_cfg.op_size = 1;
+
+ throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ /* structure reset by throttle_init previous_leak should be null */
+ g_assert(!ts.previous_leak);
+ throttle_config(&ts, &orig_cfg);
+
+ /* has previous leak been initialized by throttle_config ? */
+ g_assert(ts.previous_leak);
+
+ /* get back the fixed configuration */
+ throttle_get_config(&ts, &final_cfg);
+
+ throttle_destroy(&ts);
+
+ g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].avg == 153);
+ g_assert(final_cfg.buckets[THROTTLE_BPS_READ].avg == 56);
+ g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].avg == 1);
+
+ g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].avg == 150);
+ g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69);
+ g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23);
+
+ g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */
+ g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 1); /* not fixed */
+ g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120);
+
+ g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150);
+ g_assert(final_cfg.buckets[THROTTLE_OPS_READ].max == 400);
+ g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].max == 500);
+
+ g_assert(final_cfg.op_size == 1);
+
+ /* check bucket have been cleared */
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ g_assert(!final_cfg.buckets[i].level);
+ }
+}
+
+/* functions to test is throttle is enabled by a config */
+static void set_cfg_value(bool is_max, int index, int value)
+{
+ if (is_max) {
+ cfg.buckets[index].max = value;
+ } else {
+ cfg.buckets[index].avg = value;
+ }
+}
+
+static void test_enabled(void)
+{
+ int i;
+
+ memset(&cfg, 0, sizeof(cfg));
+ g_assert(!throttle_enabled(&cfg));
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(false, i, 150);
+ g_assert(throttle_enabled(&cfg));
+ }
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(false, i, -150);
+ g_assert(!throttle_enabled(&cfg));
+ }
+}
+
+/* tests functions for throttle_conflicting */
+
+static void test_conflicts_for_one_set(bool is_max,
+ int total,
+ int read,
+ int write)
+{
+ memset(&cfg, 0, sizeof(cfg));
+ g_assert(!throttle_conflicting(&cfg));
+
+ set_cfg_value(is_max, total, 1);
+ set_cfg_value(is_max, read, 1);
+ g_assert(throttle_conflicting(&cfg));
+
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(is_max, total, 1);
+ set_cfg_value(is_max, write, 1);
+ g_assert(throttle_conflicting(&cfg));
+
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(is_max, total, 1);
+ set_cfg_value(is_max, read, 1);
+ set_cfg_value(is_max, write, 1);
+ g_assert(throttle_conflicting(&cfg));
+
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(is_max, total, 1);
+ g_assert(!throttle_conflicting(&cfg));
+
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(is_max, read, 1);
+ set_cfg_value(is_max, write, 1);
+ g_assert(!throttle_conflicting(&cfg));
+}
+
+static void test_conflicting_config(void)
+{
+ /* bps average conflicts */
+ test_conflicts_for_one_set(false,
+ THROTTLE_BPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_BPS_WRITE);
+
+ /* ops average conflicts */
+ test_conflicts_for_one_set(false,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_OPS_READ,
+ THROTTLE_OPS_WRITE);
+
+ /* bps average conflicts */
+ test_conflicts_for_one_set(true,
+ THROTTLE_BPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_BPS_WRITE);
+ /* ops average conflicts */
+ test_conflicts_for_one_set(true,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_OPS_READ,
+ THROTTLE_OPS_WRITE);
+}
+/* functions to test the throttle_is_valid function */
+static void test_is_valid_for_value(int value, bool should_be_valid)
+{
+ int is_max, index;
+ for (is_max = 0; is_max < 2; is_max++) {
+ for (index = 0; index < BUCKETS_COUNT; index++) {
+ memset(&cfg, 0, sizeof(cfg));
+ set_cfg_value(is_max, index, value);
+ g_assert(throttle_is_valid(&cfg) == should_be_valid);
+ }
+ }
+}
+
+static void test_is_valid(void)
+{
+ /* negative number are invalid */
+ test_is_valid_for_value(-1, false);
+ /* zero are valids */
+ test_is_valid_for_value(0, true);
+ /* positives numers are valids */
+ test_is_valid_for_value(1, true);
+}
+
+static void test_have_timer(void)
+{
+ /* zero the structure */
+ memset(&ts, 0, sizeof(ts));
+
+ /* no timer set shoudl return false */
+ g_assert(!throttle_have_timer(&ts));
+
+ /* init the structure */
+ throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+
+ /* timer set by init should return true */
+ g_assert(throttle_have_timer(&ts));
+
+ throttle_destroy(&ts);
+}
+
+static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */
+ int size, /* size of the operation to do */
+ double avg, /* io limit */
+ uint64_t op_size, /* ideal size of an io */
+ double total_result,
+ double read_result,
+ double write_result)
+{
+ BucketType to_test[2][3] = { { THROTTLE_BPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_BPS_WRITE, },
+ { THROTTLE_OPS_TOTAL,
+ THROTTLE_OPS_READ,
+ THROTTLE_OPS_WRITE, } };
+ ThrottleConfig cfg;
+ BucketType index;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ BucketType index = to_test[is_ops][i];
+ cfg.buckets[index].avg = avg;
+ }
+
+ cfg.op_size = op_size;
+
+ throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts);
+ throttle_config(&ts, &cfg);
+
+ /* account a read */
+ throttle_account(&ts, false, size);
+ /* account a write */
+ throttle_account(&ts, true, size);
+
+ /* check total result */
+ index = to_test[is_ops][0];
+ if (!double_cmp(ts.cfg.buckets[index].level, total_result)) {
+ return false;
+ }
+
+ /* check read result */
+ index = to_test[is_ops][1];
+ if (!double_cmp(ts.cfg.buckets[index].level, read_result)) {
+ return false;
+ }
+
+ /* check write result */
+ index = to_test[is_ops][2];
+ if (!double_cmp(ts.cfg.buckets[index].level, write_result)) {
+ return false;
+ }
+
+ throttle_destroy(&ts);
+
+ return true;
+}
+
+static void test_accounting(void)
+{
+ /* tests for bps */
+
+ /* op of size 1 */
+ g_assert(do_test_accounting(false,
+ 1 * 512,
+ 150,
+ 0,
+ 1024,
+ 512,
+ 512));
+
+ /* op of size 2 */
+ g_assert(do_test_accounting(false,
+ 2 * 512,
+ 150,
+ 0,
+ 2048,
+ 1024,
+ 1024));
+
+ /* op of size 2 and orthogonal parameter change */
+ g_assert(do_test_accounting(false,
+ 2 * 512,
+ 150,
+ 17,
+ 2048,
+ 1024,
+ 1024));
+
+
+ /* tests for ops */
+
+ /* op of size 1 */
+ g_assert(do_test_accounting(true,
+ 1 * 512,
+ 150,
+ 0,
+ 2,
+ 1,
+ 1));
+
+ /* op of size 2 */
+ g_assert(do_test_accounting(true,
+ 2 * 512,
+ 150,
+ 0,
+ 2,
+ 1,
+ 1));
+
+ /* jumbo op accounting fragmentation : size 64 with op size of 13 units */
+ g_assert(do_test_accounting(true,
+ 64 * 512,
+ 150,
+ 13 * 512,
+ (64.0 * 2) / 13,
+ (64.0 / 13),
+ (64.0 / 13)));
+
+ /* same with orthogonal parameters changes */
+ g_assert(do_test_accounting(true,
+ 64 * 512,
+ 300,
+ 13 * 512,
+ (64.0 * 2) / 13,
+ (64.0 / 13),
+ (64.0 / 13)));
+}
+
+int main(int argc, char **argv)
+{
+ init_clocks();
+ do {} while (g_main_context_iteration(NULL, false));
+
+ /* tests in the same order as the header function declarations */
+ g_test_init(&argc, &argv, NULL);
+ g_test_add_func("/throttle/leak_bucket", test_leak_bucket);
+ g_test_add_func("/throttle/compute_wait", test_compute_wait);
+ g_test_add_func("/throttle/init", test_init);
+ g_test_add_func("/throttle/destroy", test_destroy);
+ g_test_add_func("/throttle/have_timer", test_have_timer);
+ g_test_add_func("/throttle/config/enabled", test_enabled);
+ g_test_add_func("/throttle/config/conflicting", test_conflicting_config);
+ g_test_add_func("/throttle/config/is_valid", test_is_valid);
+ g_test_add_func("/throttle/config_functions", test_config_functions);
+ g_test_add_func("/throttle/accounting", test_accounting);
+ return g_test_run();
+}
+
diff --git a/util/Makefile.objs b/util/Makefile.objs
index dc72ab0721..2bb13a2a59 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -11,3 +11,4 @@ util-obj-y += iov.o aes.o qemu-config.o qemu-sockets.o uri.o notify.o
util-obj-y += qemu-option.o qemu-progress.o
util-obj-y += hexdump.o
util-obj-y += crc32c.o
+util-obj-y += throttle.o
diff --git a/util/throttle.c b/util/throttle.c
new file mode 100644
index 0000000000..02e6f15587
--- /dev/null
+++ b/util/throttle.c
@@ -0,0 +1,396 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ * Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/throttle.h"
+#include "qemu/timer.h"
+
+/* This function make a bucket leak
+ *
+ * @bkt: the bucket to make leak
+ * @delta_ns: the time delta
+ */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
+{
+ double leak;
+
+ /* compute how much to leak */
+ leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+
+ /* make the bucket leak */
+ bkt->level = MAX(bkt->level - leak, 0);
+}
+
+/* Calculate the time delta since last leak and make proportionals leaks
+ *
+ * @now: the current timestamp in ns
+ */
+static void throttle_do_leak(ThrottleState *ts, int64_t now)
+{
+ /* compute the time elapsed since the last leak */
+ int64_t delta_ns = now - ts->previous_leak;
+ int i;
+
+ ts->previous_leak = now;
+
+ if (delta_ns <= 0) {
+ return;
+ }
+
+ /* make each bucket leak */
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
+ }
+}
+
+/* do the real job of computing the time to wait
+ *
+ * @limit: the throttling limit
+ * @extra: the number of operation to delay
+ * @ret: the time to wait in ns
+ */
+static int64_t throttle_do_compute_wait(double limit, double extra)
+{
+ double wait = extra * NANOSECONDS_PER_SECOND;
+ wait /= limit;
+ return wait;
+}
+
+/* This function compute the wait time in ns that a leaky bucket should trigger
+ *
+ * @bkt: the leaky bucket we operate on
+ * @ret: the resulting wait time in ns or 0 if the operation can go through
+ */
+int64_t throttle_compute_wait(LeakyBucket *bkt)
+{
+ double extra; /* the number of extra units blocking the io */
+
+ if (!bkt->avg) {
+ return 0;
+ }
+
+ extra = bkt->level - bkt->max;
+
+ if (extra <= 0) {
+ return 0;
+ }
+
+ return throttle_do_compute_wait(bkt->avg, extra);
+}
+
+/* This function compute the time that must be waited while this IO
+ *
+ * @is_write: true if the current IO is a write, false if it's a read
+ * @ret: time to wait
+ */
+static int64_t throttle_compute_wait_for(ThrottleState *ts,
+ bool is_write)
+{
+ BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_OPS_READ},
+ {THROTTLE_BPS_TOTAL,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_BPS_WRITE,
+ THROTTLE_OPS_WRITE}, };
+ int64_t wait, max_wait = 0;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ BucketType index = to_check[is_write][i];
+ wait = throttle_compute_wait(&ts->cfg.buckets[index]);
+ if (wait > max_wait) {
+ max_wait = wait;
+ }
+ }
+
+ return max_wait;
+}
+
+/* compute the timer for this type of operation
+ *
+ * @is_write: the type of operation
+ * @now: the current clock timestamp
+ * @next_timestamp: the resulting timer
+ * @ret: true if a timer must be set
+ */
+bool throttle_compute_timer(ThrottleState *ts,
+ bool is_write,
+ int64_t now,
+ int64_t *next_timestamp)
+{
+ int64_t wait;
+
+ /* leak proportionally to the time elapsed */
+ throttle_do_leak(ts, now);
+
+ /* compute the wait time if any */
+ wait = throttle_compute_wait_for(ts, is_write);
+
+ /* if the code must wait compute when the next timer should fire */
+ if (wait) {
+ *next_timestamp = now + wait;
+ return true;
+ }
+
+ /* else no need to wait at all */
+ *next_timestamp = now;
+ return false;
+}
+
+/* To be called first on the ThrottleState */
+void throttle_init(ThrottleState *ts,
+ QEMUClockType clock_type,
+ QEMUTimerCB *read_timer_cb,
+ QEMUTimerCB *write_timer_cb,
+ void *timer_opaque)
+{
+ memset(ts, 0, sizeof(ThrottleState));
+
+ ts->clock_type = clock_type;
+ ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque);
+ ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque);
+}
+
+/* destroy a timer */
+static void throttle_timer_destroy(QEMUTimer **timer)
+{
+ assert(*timer != NULL);
+
+ timer_del(*timer);
+ timer_free(*timer);
+ *timer = NULL;
+}
+
+/* To be called last on the ThrottleState */
+void throttle_destroy(ThrottleState *ts)
+{
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ throttle_timer_destroy(&ts->timers[i]);
+ }
+}
+
+/* is any throttling timer configured */
+bool throttle_have_timer(ThrottleState *ts)
+{
+ if (ts->timers[0]) {
+ return true;
+ }
+
+ return false;
+}
+
+/* Does any throttling must be done
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if throttling must be done else false
+ */
+bool throttle_enabled(ThrottleConfig *cfg)
+{
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].avg > 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* return true if any two throttling parameters conflicts
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if any conflict detected else false
+ */
+bool throttle_conflicting(ThrottleConfig *cfg)
+{
+ bool bps_flag, ops_flag;
+ bool bps_max_flag, ops_max_flag;
+
+ bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
+ (cfg->buckets[THROTTLE_BPS_READ].avg ||
+ cfg->buckets[THROTTLE_BPS_WRITE].avg);
+
+ ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+ (cfg->buckets[THROTTLE_OPS_READ].avg ||
+ cfg->buckets[THROTTLE_OPS_WRITE].avg);
+
+ bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
+ (cfg->buckets[THROTTLE_BPS_READ].max ||
+ cfg->buckets[THROTTLE_BPS_WRITE].max);
+
+ ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
+ (cfg->buckets[THROTTLE_OPS_READ].max ||
+ cfg->buckets[THROTTLE_OPS_WRITE].max);
+
+ return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
+}
+
+/* check if a throttling configuration is valid
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if valid else false
+ */
+bool throttle_is_valid(ThrottleConfig *cfg)
+{
+ bool invalid = false;
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].avg < 0) {
+ invalid = true;
+ }
+ }
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].max < 0) {
+ invalid = true;
+ }
+ }
+
+ return !invalid;
+}
+
+/* fix bucket parameters */
+static void throttle_fix_bucket(LeakyBucket *bkt)
+{
+ double min;
+
+ /* zero bucket level */
+ bkt->level = 0;
+
+ /* The following is done to cope with the Linux CFQ block scheduler
+ * which regroup reads and writes by block of 100ms in the guest.
+ * When they are two process one making reads and one making writes cfq
+ * make a pattern looking like the following:
+ * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
+ * Having a max burst value of 100ms of the average will help smooth the
+ * throttling
+ */
+ min = bkt->avg / 10;
+ if (bkt->avg && !bkt->max) {
+ bkt->max = min;
+ }
+}
+
+/* take care of canceling a timer */
+static void throttle_cancel_timer(QEMUTimer *timer)
+{
+ assert(timer != NULL);
+
+ timer_del(timer);
+}
+
+/* Used to configure the throttle
+ *
+ * @ts: the throttle state we are working on
+ * @cfg: the config to set
+ */
+void throttle_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+ int i;
+
+ ts->cfg = *cfg;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ throttle_fix_bucket(&ts->cfg.buckets[i]);
+ }
+
+ ts->previous_leak = qemu_clock_get_ns(ts->clock_type);
+
+ for (i = 0; i < 2; i++) {
+ throttle_cancel_timer(ts->timers[i]);
+ }
+}
+
+/* used to get config
+ *
+ * @ts: the throttle state we are working on
+ * @cfg: the config to write
+ */
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+ *cfg = ts->cfg;
+}
+
+
+/* Schedule the read or write timer if needed
+ *
+ * NOTE: this function is not unit tested due to it's usage of timer_mod
+ *
+ * @is_write: the type of operation (read/write)
+ * @ret: true if the timer has been scheduled else false
+ */
+bool throttle_schedule_timer(ThrottleState *ts, bool is_write)
+{
+ int64_t now = qemu_clock_get_ns(ts->clock_type);
+ int64_t next_timestamp;
+ bool must_wait;
+
+ must_wait = throttle_compute_timer(ts,
+ is_write,
+ now,
+ &next_timestamp);
+
+ /* request not throttled */
+ if (!must_wait) {
+ return false;
+ }
+
+ /* request throttled and timer pending -> do nothing */
+ if (timer_pending(ts->timers[is_write])) {
+ return true;
+ }
+
+ /* request throttled and timer not pending -> arm timer */
+ timer_mod(ts->timers[is_write], next_timestamp);
+ return true;
+}
+
+/* do the accounting for this operation
+ *
+ * @is_write: the type of operation (read/write)
+ * @size: the size of the operation
+ */
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+{
+ double units = 1.0;
+
+ /* if cfg.op_size is defined and smaller than size we compute unit count */
+ if (ts->cfg.op_size && size > ts->cfg.op_size) {
+ units = (double) size / ts->cfg.op_size;
+ }
+
+ ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+
+ if (is_write) {
+ ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
+ } else {
+ ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+ }
+}
+
diff --git a/xen-all.c b/xen-all.c
index eb13111361..839f14f53c 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -154,7 +154,7 @@ qemu_irq *xen_interrupt_controller_init(void)
/* Memory Ops */
-static void xen_ram_init(ram_addr_t ram_size)
+static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p)
{
MemoryRegion *sysmem = get_system_memory();
ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
@@ -168,6 +168,7 @@ static void xen_ram_init(ram_addr_t ram_size)
block_len += HVM_BELOW_4G_MMIO_LENGTH;
}
memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len);
+ *ram_memory_p = &ram_memory;
vmstate_register_ram_global(&ram_memory);
if (ram_size >= HVM_BELOW_4G_RAM_END) {
@@ -1059,7 +1060,7 @@ static void xen_read_physmap(XenIOState *state)
free(entries);
}
-int xen_hvm_init(void)
+int xen_hvm_init(MemoryRegion **ram_memory)
{
int i, rc;
unsigned long ioreq_pfn;
@@ -1134,7 +1135,7 @@ int xen_hvm_init(void)
/* Init RAM management */
xen_map_cache_init(xen_phys_offset_to_gaddr, state);
- xen_ram_init(ram_size);
+ xen_ram_init(ram_size, ram_memory);
qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
diff --git a/xen-stub.c b/xen-stub.c
index 47c8e73e0f..ad189a6df8 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -64,7 +64,7 @@ void xen_modified_memory(ram_addr_t start, ram_addr_t length)
{
}
-int xen_hvm_init(void)
+int xen_hvm_init(MemoryRegion **ram_memory)
{
return 0;
}