From 4e35b92a51571002a68d3be74b774546d9aefd19 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 17 Apr 2012 19:41:08 +0200 Subject: block: Fix spelling in comment (ineffcient -> inefficient) Signed-off-by: Stefan Weil Signed-off-by: Kevin Wolf --- block/cow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/cow.c b/block/cow.c index 8d3c9f873c..a5a00eb9ca 100644 --- a/block/cow.c +++ b/block/cow.c @@ -103,7 +103,7 @@ static int cow_open(BlockDriverState *bs, int flags) } /* - * XXX(hch): right now these functions are extremely ineffcient. + * XXX(hch): right now these functions are extremely inefficient. * We should just read the whole bitmap we'll need in one go instead. */ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum) -- cgit v1.2.3 From 8dc0a5e7a06c059683f9c379c0a4b0bbc20d5c74 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 18 Apr 2012 16:18:14 +0200 Subject: qcow2: Fix error handling in qcow2_alloc_cluster_offset If do_alloc_cluster_offset() fails, the error handling code tried to remove the request from the in-flight queue, to which it wasn't added yet, resulting in a NULL pointer dereference. m->nb_clusters really only becomes != 0 when the request is in the list. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index cbd224dc46..dcf70a24d3 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -931,7 +931,7 @@ again: fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); fail_put: - if (nb_clusters > 0) { + if (m->nb_clusters > 0) { QLIST_REMOVE(m, next_in_flight); } return ret; -- cgit v1.2.3 From 2795ecf681107d55e4113592b3045ece5f6e7b3b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 18 Apr 2012 16:27:06 +0200 Subject: qcow2: Fix return value of alloc_refcount_block Someone forgot something in commit 29c1a730... Documenting the right return value is not enough, you also need to actually return it in the code. This bug sometimes causes error return values even when everything has succeeded: The new offset of the refcount block is truncated to 32 bits and interpreted as signed. At least with small cluster sizes it's easy to get a negative return value this way. Signed-off-by: Kevin Wolf Reviewed-by: Paolo Bonzini --- block/qcow2-refcount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index f39928a6bf..565bd54902 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -400,7 +400,7 @@ static int alloc_refcount_block(BlockDriverState *bs, return ret; } - return new_block; + return 0; fail_table: g_free(new_table); -- cgit v1.2.3 From bafbd6a1c69fef73500309dc31c86984c6d22b43 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Apr 2012 14:00:54 +0200 Subject: aio: remove process_queue callback and qemu_aio_process_queue Both unused after the previous patch. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- aio.c | 29 ++--------------------------- block/curl.c | 10 ++++------ block/iscsi.c | 4 ++-- block/nbd.c | 8 ++++---- block/rbd.c | 5 ++--- block/sheepdog.c | 11 +++++------ linux-aio.c | 2 +- posix-aio-compat.c | 3 +-- qemu-aio.h | 13 ------------- 9 files changed, 21 insertions(+), 64 deletions(-) (limited to 'block') diff --git a/aio.c b/aio.c index eb3bf42574..f19b3c6b09 100644 --- a/aio.c +++ b/aio.c @@ -35,7 +35,6 @@ struct AioHandler IOHandler *io_read; IOHandler *io_write; AioFlushHandler *io_flush; - AioProcessQueue *io_process_queue; int deleted; void *opaque; QLIST_ENTRY(AioHandler) node; @@ -58,7 +57,6 @@ int qemu_aio_set_fd_handler(int fd, IOHandler *io_read, IOHandler *io_write, AioFlushHandler *io_flush, - AioProcessQueue *io_process_queue, void *opaque) { AioHandler *node; @@ -91,7 +89,6 @@ int qemu_aio_set_fd_handler(int fd, node->io_read = io_read; node->io_write = io_write; node->io_flush = io_flush; - node->io_process_queue = io_process_queue; node->opaque = opaque; } @@ -122,39 +119,17 @@ void qemu_aio_flush(void) } while (qemu_bh_poll() || ret > 0); } -int qemu_aio_process_queue(void) -{ - AioHandler *node; - int ret = 0; - - walking_handlers = 1; - - QLIST_FOREACH(node, &aio_handlers, node) { - if (node->io_process_queue) { - if (node->io_process_queue(node->opaque)) { - ret = 1; - } - } - } - - walking_handlers = 0; - - return ret; -} - void qemu_aio_wait(void) { int ret; - if (qemu_bh_poll()) - return; - /* * If there are callbacks left that have been queued, we need to call then. * Return afterwards to avoid waiting needlessly in select(). */ - if (qemu_aio_process_queue()) + if (qemu_bh_poll()) { return; + } do { AioHandler *node; diff --git a/block/curl.c b/block/curl.c index a909eca337..bf3680ba57 100644 --- a/block/curl.c +++ b/block/curl.c @@ -89,19 +89,17 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd); switch (action) { case CURL_POLL_IN: - qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, - NULL, s); + qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, curl_aio_flush, s); break; case CURL_POLL_OUT: - qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, - NULL, s); + qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, curl_aio_flush, s); break; case CURL_POLL_INOUT: qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, - curl_aio_flush, NULL, s); + curl_aio_flush, s); break; case CURL_POLL_REMOVE: - qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(fd, NULL, NULL, NULL, NULL); break; } diff --git a/block/iscsi.c b/block/iscsi.c index bd3ca11b2e..5222726d0f 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -108,7 +108,7 @@ iscsi_set_events(IscsiLun *iscsilun) qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), iscsi_process_read, (iscsi_which_events(iscsi) & POLLOUT) ? iscsi_process_write : NULL, - iscsi_process_flush, NULL, iscsilun); + iscsi_process_flush, iscsilun); } static void @@ -682,7 +682,7 @@ static void iscsi_close(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct iscsi_context *iscsi = iscsilun->iscsi; - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL, NULL); iscsi_destroy_context(iscsi); memset(iscsilun, 0, sizeof(IscsiLun)); } diff --git a/block/nbd.c b/block/nbd.c index 161b299855..524c9cf412 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -191,7 +191,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_co_mutex_lock(&s->send_mutex); s->send_coroutine = qemu_coroutine_self(); qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, - nbd_have_request, NULL, s); + nbd_have_request, s); rc = nbd_send_request(s->sock, request); if (rc != -1 && iov) { ret = qemu_co_sendv(s->sock, iov, request->len, offset); @@ -201,7 +201,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, } } qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, - nbd_have_request, NULL, s); + nbd_have_request, s); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -274,7 +274,7 @@ static int nbd_establish_connection(BlockDriverState *bs) * kick the reply mechanism. */ socket_set_nonblock(sock); qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, - nbd_have_request, NULL, s); + nbd_have_request, s); s->sock = sock; s->size = size; @@ -294,7 +294,7 @@ static void nbd_teardown_connection(BlockDriverState *bs) request.len = 0; nbd_send_request(s->sock, &request); - qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL); closesocket(s->sock); } diff --git a/block/rbd.c b/block/rbd.c index 46a8579018..6cd84488e4 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -504,7 +504,7 @@ static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags) fcntl(s->fds[0], F_SETFL, O_NONBLOCK); fcntl(s->fds[1], F_SETFL, O_NONBLOCK); qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, - NULL, qemu_rbd_aio_flush_cb, NULL, s); + NULL, qemu_rbd_aio_flush_cb, s); return 0; @@ -525,8 +525,7 @@ static void qemu_rbd_close(BlockDriverState *bs) close(s->fds[0]); close(s->fds[1]); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL , NULL, NULL, NULL, - NULL); + qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL, NULL); rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); diff --git a/block/sheepdog.c b/block/sheepdog.c index 3eaf625e98..0ed6b193c9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -799,8 +799,7 @@ static int get_sheep_fd(BDRVSheepdogState *s) return -1; } - qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, - NULL, s); + qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s); return fd; } @@ -973,7 +972,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, qemu_co_mutex_lock(&s->lock); s->co_send = qemu_coroutine_self(); qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, - aio_flush_request, NULL, s); + aio_flush_request, s); socket_set_cork(s->fd, 1); /* send a header */ @@ -995,7 +994,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, socket_set_cork(s->fd, 0); qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, - aio_flush_request, NULL, s); + aio_flush_request, s); qemu_co_mutex_unlock(&s->lock); return 0; @@ -1135,7 +1134,7 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) g_free(buf); return 0; out: - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); if (s->fd >= 0) { closesocket(s->fd); } @@ -1349,7 +1348,7 @@ static void sd_close(BlockDriverState *bs) error_report("%s, %s", sd_strerror(rsp->result), s->name); } - qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL); + qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); if (s->cache_enabled) { closesocket(s->flush_fd); diff --git a/linux-aio.c b/linux-aio.c index 15261ece3d..fa0fbf34aa 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -214,7 +214,7 @@ void *laio_init(void) goto out_close_efd; qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL, - qemu_laio_flush_cb, NULL, s); + qemu_laio_flush_cb, s); return s; diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 1066c601d2..68361f555a 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -663,8 +663,7 @@ int paio_init(void) fcntl(s->rfd, F_SETFL, O_NONBLOCK); fcntl(s->wfd, F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, - NULL, s); + qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s); ret = pthread_attr_init(&attr); if (ret) diff --git a/qemu-aio.h b/qemu-aio.h index 230c2f79a0..0fc84093c0 100644 --- a/qemu-aio.h +++ b/qemu-aio.h @@ -41,11 +41,6 @@ void qemu_aio_release(void *p); /* Returns 1 if there are still outstanding AIO requests; 0 otherwise */ typedef int (AioFlushHandler)(void *opaque); -/* Runs all currently allowed AIO callbacks of completed requests in the - * respective AIO backend. Returns 0 if no requests was handled, non-zero - * if at least one queued request was handled. */ -typedef int (AioProcessQueue)(void *opaque); - /* Flush any pending AIO operation. This function will block until all * outstanding AIO operations have been completed or cancelled. */ void qemu_aio_flush(void); @@ -56,13 +51,6 @@ void qemu_aio_flush(void); * result of executing I/O completion or bh callbacks. */ void qemu_aio_wait(void); -/* - * Runs all currently allowed AIO callbacks of completed requests. Returns 0 - * if no requests were handled, non-zero if at least one request was - * processed. - */ -int qemu_aio_process_queue(void); - /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will * be invoked when using either qemu_aio_wait() or qemu_aio_flush(). @@ -74,7 +62,6 @@ int qemu_aio_set_fd_handler(int fd, IOHandler *io_read, IOHandler *io_write, AioFlushHandler *io_flush, - AioProcessQueue *io_process_queue, void *opaque); #endif -- cgit v1.2.3 From f24423bd902bce29bc546cf8d030bfa369726ab1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Apr 2012 15:50:39 +0200 Subject: qcow2: Fix refcount block allocation during qcow2_alloc_cluster_at() Refcount block allocation and refcount table growth rely on s->free_cluster_index pointing to somewhere after the current allocation. Change qcow2_alloc_cluster_at() to fulfill this assumption. Without this change it could happen that a newly allocated refcount block and the allocated data block point to the same area in the image file, causing data corruption in the long run. This fixes a bug that became first visible after commit 250196f1. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 565bd54902..6c383373de 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -587,6 +587,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, { BDRVQcowState *s = bs->opaque; uint64_t cluster_index; + uint64_t old_free_cluster_index; int i, refcount, ret; /* Check how many clusters there are free */ @@ -602,11 +603,16 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, } /* And then allocate them */ + old_free_cluster_index = s->free_cluster_index; + s->free_cluster_index = cluster_index + i; + ret = update_refcount(bs, offset, i << s->cluster_bits, 1); if (ret < 0) { return ret; } + s->free_cluster_index = old_free_cluster_index; + return i; } -- cgit v1.2.3 From 90b277593df873d3a2480f002e2eb5fe1f8e5277 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 11 Apr 2012 16:33:50 +0200 Subject: qcow2: Save disk size in snapshot header This allows that different snapshots of an image can have different sizes, which is a requirement for enabling image resizing even with images that have internal snapshots. We don't do the actual support for it now, but make sure that the additional field is present and not completely ignored in all version 3 images. When trying to load a snapshot of different size, it returns an error. Signed-off-by: Kevin Wolf --- block/qcow2-snapshot.c | 16 ++++++++++++++++ block/qcow2.h | 1 + 2 files changed, 17 insertions(+) (limited to 'block') diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index 7d3fde5a8a..42f971b590 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -48,6 +48,7 @@ typedef struct QEMU_PACKED QCowSnapshotHeader { typedef struct QEMU_PACKED QCowSnapshotExtraData { uint64_t vm_state_size_large; + uint64_t disk_size; } QCowSnapshotExtraData; void qcow2_free_snapshots(BlockDriverState *bs) @@ -117,6 +118,12 @@ int qcow2_read_snapshots(BlockDriverState *bs) sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large); } + if (extra_data_size >= 16) { + sn->disk_size = be64_to_cpu(extra.disk_size); + } else { + sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; + } + /* Read snapshot ID */ sn->id_str = g_malloc(id_str_size + 1); ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); @@ -197,6 +204,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) memset(&extra, 0, sizeof(extra)); extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size); + extra.disk_size = cpu_to_be64(sn->disk_size); id_str_size = strlen(sn->id_str); name_size = strlen(sn->name); @@ -330,6 +338,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) sn->id_str = g_strdup(sn_info->id_str); sn->name = g_strdup(sn_info->name); + sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; sn->vm_state_size = sn_info->vm_state_size; sn->date_sec = sn_info->date_sec; sn->date_nsec = sn_info->date_nsec; @@ -426,6 +435,13 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) } sn = &s->snapshots[snapshot_index]; + if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { + error_report("qcow2: Loading snapshots with different disk " + "size is not implemented"); + ret = -ENOTSUP; + goto fail; + } + /* * Make sure that the current L1 table is big enough to contain the whole * L1 table of the snapshot. If the snapshot L1 table is smaller, the diff --git a/block/qcow2.h b/block/qcow2.h index e4ac366cfc..ddb976a759 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -78,6 +78,7 @@ typedef struct QCowSnapshot { uint32_t l1_size; char *id_str; char *name; + uint64_t disk_size; uint64_t vm_state_size; uint32_t date_sec; uint32_t date_nsec; -- cgit v1.2.3 From 68d000a39074fe3888680491444a7fde2354cd84 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 14 Mar 2012 19:15:03 +0100 Subject: qcow2: Ignore reserved bits in get_cluster_offset With this change, reading from a qcow2 image ignores all reserved bits that are set in an L1 or L2 table entry. Now get_cluster_offset() assigns *cluster_offset only the offset without any other flags. The cluster type is not longer encoded in the offset, but a positive return value in case of success. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 41 +++++++++++++++++++++++++---------------- block/qcow2.c | 17 ++++++++++++++--- block/qcow2.h | 21 +++++++++++++++++++++ 3 files changed, 60 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index dcf70a24d3..c8c85d224a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -367,11 +367,9 @@ out: * * on exit, *num is the number of contiguous sectors we can read. * - * Return 0, if the offset is found - * Return -errno, otherwise. - * + * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error + * cases. */ - int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset) { @@ -407,19 +405,19 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* seek the the l2 offset in the l1 table */ l1_index = offset >> l1_bits; - if (l1_index >= s->l1_size) + if (l1_index >= s->l1_size) { + ret = QCOW2_CLUSTER_UNALLOCATED; goto out; + } - l2_offset = s->l1_table[l1_index]; - - /* seek the l2 table of the given l2 offset */ - - if (!l2_offset) + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; + if (!l2_offset) { + ret = QCOW2_CLUSTER_UNALLOCATED; goto out; + } /* load the l2 table in memory */ - l2_offset &= ~QCOW_OFLAG_COPIED; ret = l2_load(bs, l2_offset, &l2_table); if (ret < 0) { return ret; @@ -431,26 +429,37 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, *cluster_offset = be64_to_cpu(l2_table[l2_index]); nb_clusters = size_to_clusters(s, nb_needed << 9); - if (!*cluster_offset) { + ret = qcow2_get_cluster_type(*cluster_offset); + switch (ret) { + case QCOW2_CLUSTER_COMPRESSED: + /* Compressed clusters can only be processed one by one */ + c = 1; + *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; + break; + case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); - } else { + *cluster_offset = 0; + break; + case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); + *cluster_offset &= L2E_OFFSET_MASK; + break; } qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - nb_available = (c * s->cluster_sectors); + nb_available = (c * s->cluster_sectors); + out: if (nb_available > nb_needed) nb_available = nb_needed; *num = nb_available - index_in_cluster; - *cluster_offset &=~QCOW_OFLAG_COPIED; - return 0; + return ret; } /* diff --git a/block/qcow2.c b/block/qcow2.c index 70d3141dd1..4c82adc96f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -449,7 +449,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_copy(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); - if (!cluster_offset) { + switch (ret) { + case QCOW2_CLUSTER_UNALLOCATED: if (bs->backing_hd) { /* read from the base image */ @@ -469,7 +470,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, /* Note: in this case, no need to wait */ qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); } - } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { + break; + + case QCOW2_CLUSTER_COMPRESSED: /* add AIO support for compressed blocks ? */ ret = qcow2_decompress_cluster(bs, cluster_offset); if (ret < 0) { @@ -479,7 +482,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_from_buffer(&hd_qiov, s->cluster_cache + index_in_cluster * 512, 512 * cur_nr_sectors); - } else { + break; + + case QCOW2_CLUSTER_NORMAL: if ((cluster_offset & 511) != 0) { ret = -EIO; goto fail; @@ -520,6 +525,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, qemu_iovec_from_buffer(&hd_qiov, cluster_data, 512 * cur_nr_sectors); } + break; + + default: + g_assert_not_reached(); + ret = -EIO; + goto fail; } remaining_sectors -= cur_nr_sectors; diff --git a/block/qcow2.h b/block/qcow2.h index ddb976a759..a22d7fafbe 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -165,6 +165,16 @@ typedef struct QCowL2Meta QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; +enum { + QCOW2_CLUSTER_UNALLOCATED, + QCOW2_CLUSTER_NORMAL, + QCOW2_CLUSTER_COMPRESSED, +}; + +#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL +#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL +#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL + static inline int size_to_clusters(BDRVQcowState *s, int64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; @@ -182,6 +192,17 @@ static inline int64_t align_offset(int64_t offset, int n) return offset; } +static inline int qcow2_get_cluster_type(uint64_t l2_entry) +{ + if (l2_entry & QCOW_OFLAG_COMPRESSED) { + return QCOW2_CLUSTER_COMPRESSED; + } else if (!(l2_entry & L2E_OFFSET_MASK)) { + return QCOW2_CLUSTER_UNALLOCATED; + } else { + return QCOW2_CLUSTER_NORMAL; + } +} + // FIXME Need qcow2_ prefix to global functions -- cgit v1.2.3 From 2bfcc4a0a0b5120b6518e1c823c18cf0e8b963cb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Mar 2012 16:37:40 +0100 Subject: qcow2: Ignore reserved bits in count_contiguous_clusters() Until now, count_contiguous_clusters() has an argument that allowed to specify flags that should be ignored in the comparison, i.e. that are allowed to change between contiguous clusters. This patch changes the function so that it ignores all flags by default now and you need to pass the flags on which it should stop. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c8c85d224a..7b059908a0 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -246,28 +246,44 @@ fail: return ret; } +/* + * Checks how many clusters in a given L2 table are contiguous in the image + * file. As soon as one of the flags in the bitmask stop_flags changes compared + * to the first cluster, the search is stopped and the cluster is not counted + * as contiguous. (This allows it, for example, to stop at the first compressed + * cluster which may require a different handling) + */ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size, - uint64_t *l2_table, uint64_t start, uint64_t mask) + uint64_t *l2_table, uint64_t start, uint64_t stop_flags) { int i; - uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask; + uint64_t mask = stop_flags | L2E_OFFSET_MASK; + uint64_t offset = be64_to_cpu(l2_table[0]) & mask; if (!offset) return 0; - for (i = start; i < start + nb_clusters; i++) - if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask)) + for (i = start; i < start + nb_clusters; i++) { + uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; + if (offset + (uint64_t) i * cluster_size != l2_entry) { break; + } + } return (i - start); } static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table) { - int i = 0; + int i; - while(nb_clusters-- && l2_table[i] == 0) - i++; + for (i = 0; i < nb_clusters; i++) { + int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); + + if (type != QCOW2_CLUSTER_UNALLOCATED) { + break; + } + } return i; } @@ -444,7 +460,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); + &l2_table[l2_index], 0, QCOW_OFLAG_COMPRESSED); *cluster_offset &= L2E_OFFSET_MASK; break; } @@ -696,7 +712,8 @@ static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, while (i < nb_clusters) { i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, - &l2_table[l2_index], i, 0); + &l2_table[l2_index], i, + QCOW_OFLAG_COPIED | QCOW_OFLAG_COMPRESSED); if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) { break; } @@ -854,7 +871,8 @@ again: if (cluster_offset & QCOW_OFLAG_COPIED) { /* We keep all QCOW_OFLAG_COPIED clusters */ keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, 0); + &l2_table[l2_index], 0, + QCOW_OFLAG_COPIED); assert(keep_clusters <= nb_clusters); nb_clusters -= keep_clusters; } else { -- cgit v1.2.3 From b0b6862e5e1a1394e0ab3d5da94ba8b0da8664e2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Mar 2012 17:20:11 +0100 Subject: qcow2: Fail write_compressed when overwriting data qcow2_alloc_compressed_cluster_offset() already fails if the copied flag is set, because qcow2_write_compressed() doesn't perform COW as it would have to do to allow this. However, what we really want to check here is whether the cluster is allocated or not. With internal snapshots the copied flag may not be set on allocated clusters. Check the cluster offset instead. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 7b059908a0..dff782fc21 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -571,15 +571,14 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return 0; } + /* Compression can't overwrite anything. Fail if the cluster was already + * allocated. */ cluster_offset = be64_to_cpu(l2_table[l2_index]); - if (cluster_offset & QCOW_OFLAG_COPIED) { + if (cluster_offset & L2E_OFFSET_MASK) { qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); return 0; } - if (cluster_offset) - qcow2_free_any_clusters(bs, cluster_offset, 1); - cluster_offset = qcow2_alloc_bytes(bs, compressed_size); if (cluster_offset < 0) { qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); -- cgit v1.2.3 From 8e37f681d58bbe166c20559e77fd55b1cb8e6e4b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 23 Feb 2012 15:40:55 +0100 Subject: qcow2: Ignore reserved bits in L1/L2 entries This changes the still existing places that assume that the only flags are QCOW_OFLAG_COPIED and QCOW_OFLAG_COMPRESSED to properly mask out reserved bits. It does not convert bdrv_check yet. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 26 +++++++++++++------------- block/qcow2-refcount.c | 12 ++++++------ 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index dff782fc21..d1602d4bdd 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -195,7 +195,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) l2_table = *table; - if (old_l2_offset == 0) { + if ((old_l2_offset & L1E_OFFSET_MASK) == 0) { /* if there was no old l2 table, clear the new table */ memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); } else { @@ -203,7 +203,8 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* if there was an old l2 table, read it from the disk */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); - ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset, + ret = qcow2_cache_get(bs, s->l2_table_cache, + old_l2_offset & L1E_OFFSET_MASK, (void**) &old_table); if (ret < 0) { goto fail; @@ -508,13 +509,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, return ret; } } - l2_offset = s->l1_table[l1_index]; + + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; /* seek the l2 table of the given l2 offset */ - if (l2_offset & QCOW_OFLAG_COPIED) { + if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) { /* load the l2 table in memory */ - l2_offset &= ~QCOW_OFLAG_COPIED; ret = l2_load(bs, l2_offset, &l2_table); if (ret < 0) { return ret; @@ -530,7 +531,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, if (l2_offset) { qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); } - l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED; + l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; } /* find the cluster offset for the given disk offset */ @@ -687,8 +688,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) */ if (j != 0) { for (i = 0; i < j; i++) { - qcow2_free_any_clusters(bs, - be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); + qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1); } } @@ -867,7 +867,9 @@ again: * Check how many clusters are already allocated and don't need COW, and how * many need a new allocation. */ - if (cluster_offset & QCOW_OFLAG_COPIED) { + if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL + && (cluster_offset & QCOW_OFLAG_COPIED)) + { /* We keep all QCOW_OFLAG_COPIED clusters */ keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], 0, @@ -886,7 +888,7 @@ again: cluster_offset = 0; } - cluster_offset &= ~QCOW_OFLAG_COPIED; + cluster_offset &= L2E_OFFSET_MASK; /* If there is something left to allocate, do that now */ *m = (QCowL2Meta) { @@ -1041,9 +1043,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, uint64_t old_offset; old_offset = be64_to_cpu(l2_table[l2_index + i]); - old_offset &= ~QCOW_OFLAG_COPIED; - - if (old_offset == 0) { + if ((old_offset & L2E_OFFSET_MASK) == 0) { continue; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6c383373de..84fa343248 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -702,9 +702,8 @@ void qcow2_free_any_clusters(BlockDriverState *bs, return; } - qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits); - - return; + qcow2_free_clusters(bs, cluster_offset & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits); } @@ -764,7 +763,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, l2_offset = l1_table[i]; if (l2_offset) { old_l2_offset = l2_offset; - l2_offset &= ~QCOW_OFLAG_COPIED; + l2_offset &= L1E_OFFSET_MASK; ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) &l2_table); @@ -796,10 +795,11 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, /* compressed clusters are never modified */ refcount = 2; } else { + uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits; if (addend != 0) { - refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend); + refcount = update_cluster_refcount(bs, cluster_index, addend); } else { - refcount = get_refcount(bs, offset >> s->cluster_bits); + refcount = get_refcount(bs, cluster_index); } if (refcount < 0) { -- cgit v1.2.3 From c7a4c37a0f768f7ad50f64414d578436019e1e96 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 27 Mar 2012 13:09:22 +0200 Subject: qcow2: Refactor qcow2_free_any_clusters Zero clusters will add another cluster type. Refactor the open-coded cluster type detection into a switch of QCOW2_CLUSTER_* options so that the detection is in a single place. This makes it easier to add new cluster types. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 84fa343248..909d61560a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -679,31 +679,34 @@ void qcow2_free_clusters(BlockDriverState *bs, } /* - * free_any_clusters - * - * free clusters according to its type: compressed or not - * + * Free a cluster using its L2 entry (handles clusters of all types, e.g. + * normal cluster, compressed cluster, etc.) */ - void qcow2_free_any_clusters(BlockDriverState *bs, - uint64_t cluster_offset, int nb_clusters) + uint64_t l2_entry, int nb_clusters) { BDRVQcowState *s = bs->opaque; - /* free the cluster */ - - if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - int nb_csectors; - nb_csectors = ((cluster_offset >> s->csize_shift) & - s->csize_mask) + 1; - qcow2_free_clusters(bs, - (cluster_offset & s->cluster_offset_mask) & ~511, - nb_csectors * 512); - return; + switch (qcow2_get_cluster_type(l2_entry)) { + case QCOW2_CLUSTER_COMPRESSED: + { + int nb_csectors; + nb_csectors = ((l2_entry >> s->csize_shift) & + s->csize_mask) + 1; + qcow2_free_clusters(bs, + (l2_entry & s->cluster_offset_mask) & ~511, + nb_csectors * 512); + } + break; + case QCOW2_CLUSTER_NORMAL: + qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK, + nb_clusters << s->cluster_bits); + break; + case QCOW2_CLUSTER_UNALLOCATED: + break; + default: + abort(); } - - qcow2_free_clusters(bs, cluster_offset & L2E_OFFSET_MASK, - nb_clusters << s->cluster_bits); } -- cgit v1.2.3 From 143550a83ef4eef86a847d00023d148e1f59f743 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 27 Mar 2012 13:17:22 +0200 Subject: qcow2: Simplify count_cow_clusters count_cow_clusters() tries to reuse existing functions, and all it achieves is to make things much more complicated than they really are: Everything needs COW, unless it's a normal cluster with refcount 1. This patch implements the obvious way of doing this, and by using qcow2_get_cluster_type() it gets rid of all flag magic. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d1602d4bdd..b8836ba009 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -706,30 +706,27 @@ err: static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, uint64_t *l2_table, int l2_index) { - int i = 0; - uint64_t cluster_offset; + int i; - while (i < nb_clusters) { - i += count_contiguous_clusters(nb_clusters - i, s->cluster_size, - &l2_table[l2_index], i, - QCOW_OFLAG_COPIED | QCOW_OFLAG_COMPRESSED); - if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) { - break; - } + for (i = 0; i < nb_clusters; i++) { + uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]); + int cluster_type = qcow2_get_cluster_type(l2_entry); - i += count_contiguous_free_clusters(nb_clusters - i, - &l2_table[l2_index + i]); - if (i >= nb_clusters) { + switch(cluster_type) { + case QCOW2_CLUSTER_NORMAL: + if (l2_entry & QCOW_OFLAG_COPIED) { + goto out; + } break; - } - - cluster_offset = be64_to_cpu(l2_table[l2_index + i]); - - if ((cluster_offset & QCOW_OFLAG_COPIED) || - (cluster_offset & QCOW_OFLAG_COMPRESSED)) + case QCOW2_CLUSTER_UNALLOCATED: + case QCOW2_CLUSTER_COMPRESSED: break; + default: + abort(); + } } +out: assert(i <= nb_clusters); return i; } -- cgit v1.2.3 From 76dc9e0c8f369f1695e5413de2e28d69108476bb Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 16 Mar 2012 14:09:08 +0100 Subject: qcow2: Ignore reserved bits in refcount table entries Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 2 +- block/qcow2.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 909d61560a..e0854dc85d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -167,7 +167,7 @@ static int alloc_refcount_block(BlockDriverState *bs, if (refcount_table_index < s->refcount_table_size) { uint64_t refcount_block_offset = - s->refcount_table[refcount_table_index]; + s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK; /* If it's already there, we're done */ if (refcount_block_offset) { diff --git a/block/qcow2.h b/block/qcow2.h index a22d7fafbe..291309a9e7 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -175,6 +175,8 @@ enum { #define L2E_OFFSET_MASK 0x00ffffffffffff00ULL #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL +#define REFT_OFFSET_MASK 0xffffffffffffff00ULL + static inline int size_to_clusters(BDRVQcowState *s, int64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; -- cgit v1.2.3 From afdf0abe779f4b11712eb306ab2d4299820457b8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 27 Mar 2012 13:44:56 +0200 Subject: qcow2: Ignore reserved bits in check_refcounts Also don't infer the cluster type directly from the L2 entries, but use qcow2_get_cluster_type() to keep everything in a single place. Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 98 +++++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 44 deletions(-) (limited to 'block') diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index e0854dc85d..0112cc37b0 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -940,7 +940,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, int check_copied) { BDRVQcowState *s = bs->opaque; - uint64_t *l2_table, offset; + uint64_t *l2_table, l2_entry; int i, l2_size, nb_csectors, refcount; /* Read L2 table from disk */ @@ -952,54 +952,64 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, /* Do the actual checks */ for(i = 0; i < s->l2_size; i++) { - offset = be64_to_cpu(l2_table[i]); - if (offset != 0) { - if (offset & QCOW_OFLAG_COMPRESSED) { - /* Compressed clusters don't have QCOW_OFLAG_COPIED */ - if (offset & QCOW_OFLAG_COPIED) { - fprintf(stderr, "ERROR: cluster %" PRId64 ": " - "copied flag must never be set for compressed " - "clusters\n", offset >> s->cluster_bits); - offset &= ~QCOW_OFLAG_COPIED; - res->corruptions++; - } + l2_entry = be64_to_cpu(l2_table[i]); + + switch (qcow2_get_cluster_type(l2_entry)) { + case QCOW2_CLUSTER_COMPRESSED: + /* Compressed clusters don't have QCOW_OFLAG_COPIED */ + if (l2_entry & QCOW_OFLAG_COPIED) { + fprintf(stderr, "ERROR: cluster %" PRId64 ": " + "copied flag must never be set for compressed " + "clusters\n", l2_entry >> s->cluster_bits); + l2_entry &= ~QCOW_OFLAG_COPIED; + res->corruptions++; + } - /* Mark cluster as used */ - nb_csectors = ((offset >> s->csize_shift) & - s->csize_mask) + 1; - offset &= s->cluster_offset_mask; - inc_refcounts(bs, res, refcount_table, refcount_table_size, - offset & ~511, nb_csectors * 512); - } else { - /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ - if (check_copied) { - uint64_t entry = offset; - offset &= ~QCOW_OFLAG_COPIED; - refcount = get_refcount(bs, offset >> s->cluster_bits); - if (refcount < 0) { - fprintf(stderr, "Can't get refcount for offset %" - PRIx64 ": %s\n", entry, strerror(-refcount)); - goto fail; - } - if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) { - fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" - PRIx64 " refcount=%d\n", entry, refcount); - res->corruptions++; - } - } + /* Mark cluster as used */ + nb_csectors = ((l2_entry >> s->csize_shift) & + s->csize_mask) + 1; + l2_entry &= s->cluster_offset_mask; + inc_refcounts(bs, res, refcount_table, refcount_table_size, + l2_entry & ~511, nb_csectors * 512); + break; - /* Mark cluster as used */ - offset &= ~QCOW_OFLAG_COPIED; - inc_refcounts(bs, res, refcount_table,refcount_table_size, - offset, s->cluster_size); + case QCOW2_CLUSTER_NORMAL: + { + /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ + uint64_t offset = l2_entry & L2E_OFFSET_MASK; - /* Correct offsets are cluster aligned */ - if (offset & (s->cluster_size - 1)) { - fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " - "properly aligned; L2 entry corrupted.\n", offset); + if (check_copied) { + refcount = get_refcount(bs, offset >> s->cluster_bits); + if (refcount < 0) { + fprintf(stderr, "Can't get refcount for offset %" + PRIx64 ": %s\n", l2_entry, strerror(-refcount)); + goto fail; + } + if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "ERROR OFLAG_COPIED: offset=%" + PRIx64 " refcount=%d\n", l2_entry, refcount); res->corruptions++; } } + + /* Mark cluster as used */ + inc_refcounts(bs, res, refcount_table,refcount_table_size, + offset, s->cluster_size); + + /* Correct offsets are cluster aligned */ + if (offset & (s->cluster_size - 1)) { + fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not " + "properly aligned; L2 entry corrupted.\n", offset); + res->corruptions++; + } + break; + } + + case QCOW2_CLUSTER_UNALLOCATED: + break; + + default: + abort(); } } @@ -1070,7 +1080,7 @@ static int check_refcounts_l1(BlockDriverState *bs, } /* Mark L2 table as used */ - l2_offset &= ~QCOW_OFLAG_COPIED; + l2_offset &= L1E_OFFSET_MASK; inc_refcounts(bs, res, refcount_table, refcount_table_size, l2_offset, s->cluster_size); -- cgit v1.2.3 From 6744cbab8cd63b7ce72b3eee4f0055007acf0798 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 15 Dec 2011 12:20:58 +0100 Subject: qcow2: Version 3 images This adds the basic infrastructure to qcow2 to handle version 3 images. It includes code to create v3 images, allow header updates for v3 images and checks feature bits. It still misses support for zero clusters, so this is not a fully compliant implementation of v3 yet. The default for creating new images stays at v2 for now. Signed-off-by: Kevin Wolf --- block/qcow2.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ block/qcow2.h | 17 ++++++- block_int.h | 1 + 3 files changed, 149 insertions(+), 15 deletions(-) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index 4c82adc96f..3e1482fa7e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -61,7 +61,7 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) if (buf_size >= sizeof(QCowHeader) && be32_to_cpu(cow_header->magic) == QCOW_MAGIC && - be32_to_cpu(cow_header->version) >= QCOW_VERSION) + be32_to_cpu(cow_header->version) >= 2) return 100; else return 0; @@ -169,6 +169,19 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs) } } +static void report_unsupported(BlockDriverState *bs, const char *fmt, ...) +{ + char msg[64]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, + bs->device_name, "qcow2", msg); +} + static int qcow2_open(BlockDriverState *bs, int flags) { BDRVQcowState *s = bs->opaque; @@ -199,14 +212,64 @@ static int qcow2_open(BlockDriverState *bs, int flags) ret = -EINVAL; goto fail; } - if (header.version != QCOW_VERSION) { - char version[64]; - snprintf(version, sizeof(version), "QCOW version %d", header.version); - qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bs->device_name, "qcow2", version); + if (header.version < 2 || header.version > 3) { + report_unsupported(bs, "QCOW version %d", header.version); + ret = -ENOTSUP; + goto fail; + } + + s->qcow_version = header.version; + + /* Initialise version 3 header fields */ + if (header.version == 2) { + header.incompatible_features = 0; + header.compatible_features = 0; + header.autoclear_features = 0; + header.refcount_order = 4; + header.header_length = 72; + } else { + be64_to_cpus(&header.incompatible_features); + be64_to_cpus(&header.compatible_features); + be64_to_cpus(&header.autoclear_features); + be32_to_cpus(&header.refcount_order); + be32_to_cpus(&header.header_length); + } + + if (header.header_length > sizeof(header)) { + s->unknown_header_fields_size = header.header_length - sizeof(header); + s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); + ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, + s->unknown_header_fields_size); + if (ret < 0) { + goto fail; + } + } + + /* Handle feature bits */ + s->incompatible_features = header.incompatible_features; + s->compatible_features = header.compatible_features; + s->autoclear_features = header.autoclear_features; + + if (s->incompatible_features != 0) { + report_unsupported(bs, "incompatible features mask %" PRIx64, + header.incompatible_features); + ret = -ENOTSUP; + goto fail; + } + + if (!bs->read_only && s->autoclear_features != 0) { + s->autoclear_features = 0; + qcow2_update_header(bs); + } + + /* Check support for various header values */ + if (header.refcount_order != 4) { + report_unsupported(bs, "%d bit reference counts", + 1 << header.refcount_order); ret = -ENOTSUP; goto fail; } + if (header.cluster_bits < MIN_CLUSTER_BITS || header.cluster_bits > MAX_CLUSTER_BITS) { ret = -EINVAL; @@ -285,7 +348,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) } else { ext_end = s->cluster_size; } - if (qcow2_read_extensions(bs, sizeof(header), ext_end)) { + if (qcow2_read_extensions(bs, header.header_length, ext_end)) { ret = -EINVAL; goto fail; } @@ -321,6 +384,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) return ret; fail: + g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); qcow2_refcount_close(bs); @@ -682,7 +746,9 @@ static void qcow2_close(BlockDriverState *bs) qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); + g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); + g_free(s->cluster_cache); qemu_vfree(s->cluster_data); qcow2_refcount_close(bs); @@ -756,10 +822,10 @@ int qcow2_update_header(BlockDriverState *bs) int ret; uint64_t total_size; uint32_t refcount_table_clusters; + size_t header_length; Qcow2UnknownHeaderExtension *uext; buf = qemu_blockalign(bs, buflen); - memset(buf, 0, s->cluster_size); /* Header structure */ header = (QCowHeader*) buf; @@ -769,12 +835,14 @@ int qcow2_update_header(BlockDriverState *bs) goto fail; } + header_length = sizeof(*header) + s->unknown_header_fields_size; total_size = bs->total_sectors * BDRV_SECTOR_SIZE; refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3); *header = (QCowHeader) { + /* Version 2 fields */ .magic = cpu_to_be32(QCOW_MAGIC), - .version = cpu_to_be32(QCOW_VERSION), + .version = cpu_to_be32(s->qcow_version), .backing_file_offset = 0, .backing_file_size = 0, .cluster_bits = cpu_to_be32(s->cluster_bits), @@ -786,10 +854,42 @@ int qcow2_update_header(BlockDriverState *bs) .refcount_table_clusters = cpu_to_be32(refcount_table_clusters), .nb_snapshots = cpu_to_be32(s->nb_snapshots), .snapshots_offset = cpu_to_be64(s->snapshots_offset), + + /* Version 3 fields */ + .incompatible_features = cpu_to_be64(s->incompatible_features), + .compatible_features = cpu_to_be64(s->compatible_features), + .autoclear_features = cpu_to_be64(s->autoclear_features), + .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT), + .header_length = cpu_to_be32(header_length), }; - buf += sizeof(*header); - buflen -= sizeof(*header); + /* For older versions, write a shorter header */ + switch (s->qcow_version) { + case 2: + ret = offsetof(QCowHeader, incompatible_features); + break; + case 3: + ret = sizeof(*header); + break; + default: + return -EINVAL; + } + + buf += ret; + buflen -= ret; + memset(buf, 0, buflen); + + /* Preserve any unknown field in the header */ + if (s->unknown_header_fields_size) { + if (buflen < s->unknown_header_fields_size) { + ret = -ENOSPC; + goto fail; + } + + memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size); + buf += s->unknown_header_fields_size; + buflen -= s->unknown_header_fields_size; + } /* Backing file format header extension */ if (*bs->backing_format) { @@ -921,7 +1021,7 @@ static int preallocate(BlockDriverState *bs) static int qcow2_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, int prealloc, - QEMUOptionParameter *options) + QEMUOptionParameter *options, int version) { /* Calculate cluster_bits */ int cluster_bits; @@ -965,13 +1065,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Write the header */ memset(&header, 0, sizeof(header)); header.magic = cpu_to_be32(QCOW_MAGIC); - header.version = cpu_to_be32(QCOW_VERSION); + header.version = cpu_to_be32(version); header.cluster_bits = cpu_to_be32(cluster_bits); header.size = cpu_to_be64(0); header.l1_table_offset = cpu_to_be64(0); header.l1_size = cpu_to_be32(0); header.refcount_table_offset = cpu_to_be64(cluster_size); header.refcount_table_clusters = cpu_to_be32(1); + header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT); + header.header_length = cpu_to_be32(sizeof(header)); if (flags & BLOCK_FLAG_ENCRYPT) { header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); @@ -1053,6 +1155,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) int flags = 0; size_t cluster_size = DEFAULT_CLUSTER_SIZE; int prealloc = 0; + int version = 2; /* Read out options */ while (options && options->name) { @@ -1078,6 +1181,16 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) options->value.s); return -EINVAL; } + } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) { + if (!options->value.s || !strcmp(options->value.s, "0.10")) { + version = 2; + } else if (!strcmp(options->value.s, "1.1")) { + version = 3; + } else { + fprintf(stderr, "Invalid compatibility level: '%s'\n", + options->value.s); + return -EINVAL; + } } options++; } @@ -1089,7 +1202,7 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) } return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, - cluster_size, prealloc, options); + cluster_size, prealloc, options, version); } static int qcow2_make_empty(BlockDriverState *bs) @@ -1340,6 +1453,11 @@ static QEMUOptionParameter qcow2_create_options[] = { .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_COMPAT_LEVEL, + .type = OPT_STRING, + .help = "Compatibility level (0.10 or 1.1)" + }, { .name = BLOCK_OPT_BACKING_FILE, .type = OPT_STRING, diff --git a/block/qcow2.h b/block/qcow2.h index 291309a9e7..2dc8ca26fb 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -33,7 +33,6 @@ //#define DEBUG_EXT #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) -#define QCOW_VERSION 2 #define QCOW_CRYPT_NONE 0 #define QCOW_CRYPT_AES 1 @@ -71,6 +70,14 @@ typedef struct QCowHeader { uint32_t refcount_table_clusters; uint32_t nb_snapshots; uint64_t snapshots_offset; + + /* The following fields are only valid for version >= 3 */ + uint64_t incompatible_features; + uint64_t compatible_features; + uint64_t autoclear_features; + + uint32_t refcount_order; + uint32_t header_length; } QCowHeader; typedef struct QCowSnapshot { @@ -135,6 +142,14 @@ typedef struct BDRVQcowState { QCowSnapshot *snapshots; int flags; + int qcow_version; + + uint64_t incompatible_features; + uint64_t compatible_features; + uint64_t autoclear_features; + + size_t unknown_header_fields_size; + void* unknown_header_fields; QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; } BDRVQcowState; diff --git a/block_int.h b/block_int.h index 0e5a032e77..0acb49f100 100644 --- a/block_int.h +++ b/block_int.h @@ -50,6 +50,7 @@ #define BLOCK_OPT_TABLE_SIZE "table_size" #define BLOCK_OPT_PREALLOC "preallocation" #define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" typedef struct BdrvTrackedRequest BdrvTrackedRequest; -- cgit v1.2.3 From 6377af48b0445e7ee50db6e0bd73a3a70098f5f4 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 16 Mar 2012 15:02:38 +0100 Subject: qcow2: Support reading zero clusters This adds support for reading zero clusters in version 3 images. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 17 +++++++++++++---- block/qcow2-refcount.c | 7 +++++++ block/qcow2.c | 8 ++++++++ block/qcow2.h | 5 +++++ 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'block') diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index b8836ba009..5e5f5cf9ad 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -453,6 +453,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, c = 1; *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; break; + case QCOW2_CLUSTER_ZERO: + c = count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, + QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO); + *cluster_offset = 0; + break; case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); @@ -461,7 +467,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, case QCOW2_CLUSTER_NORMAL: /* how many allocated clusters ? */ c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, QCOW_OFLAG_COMPRESSED); + &l2_table[l2_index], 0, + QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO); *cluster_offset &= L2E_OFFSET_MASK; break; } @@ -720,6 +727,7 @@ static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, break; case QCOW2_CLUSTER_UNALLOCATED: case QCOW2_CLUSTER_COMPRESSED: + case QCOW2_CLUSTER_ZERO: break; default: abort(); @@ -868,9 +876,10 @@ again: && (cluster_offset & QCOW_OFLAG_COPIED)) { /* We keep all QCOW_OFLAG_COPIED clusters */ - keep_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, - QCOW_OFLAG_COPIED); + keep_clusters = + count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, + QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); assert(keep_clusters <= nb_clusters); nb_clusters -= keep_clusters; } else { diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0112cc37b0..812c93c5c7 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -703,6 +703,7 @@ void qcow2_free_any_clusters(BlockDriverState *bs, nb_clusters << s->cluster_bits); break; case QCOW2_CLUSTER_UNALLOCATED: + case QCOW2_CLUSTER_ZERO: break; default: abort(); @@ -973,6 +974,12 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, l2_entry & ~511, nb_csectors * 512); break; + case QCOW2_CLUSTER_ZERO: + if ((l2_entry & L2E_OFFSET_MASK) == 0) { + break; + } + /* fall through */ + case QCOW2_CLUSTER_NORMAL: { /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ diff --git a/block/qcow2.c b/block/qcow2.c index 3e1482fa7e..9a8b354d0e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -536,6 +536,14 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } break; + case QCOW2_CLUSTER_ZERO: + if (s->qcow_version < 3) { + ret = -EIO; + goto fail; + } + qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + break; + case QCOW2_CLUSTER_COMPRESSED: /* add AIO support for compressed blocks ? */ ret = qcow2_decompress_cluster(bs, cluster_offset); diff --git a/block/qcow2.h b/block/qcow2.h index 2dc8ca26fb..df2bdfd0c6 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -43,6 +43,8 @@ #define QCOW_OFLAG_COPIED (1LL << 63) /* indicate that the cluster is compressed (they never have the copied flag) */ #define QCOW_OFLAG_COMPRESSED (1LL << 62) +/* The cluster reads as all zeros */ +#define QCOW_OFLAG_ZERO (1LL << 0) #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */ @@ -184,6 +186,7 @@ enum { QCOW2_CLUSTER_UNALLOCATED, QCOW2_CLUSTER_NORMAL, QCOW2_CLUSTER_COMPRESSED, + QCOW2_CLUSTER_ZERO }; #define L1E_OFFSET_MASK 0x00ffffffffffff00ULL @@ -213,6 +216,8 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry) { if (l2_entry & QCOW_OFLAG_COMPRESSED) { return QCOW2_CLUSTER_COMPRESSED; + } else if (l2_entry & QCOW_OFLAG_ZERO) { + return QCOW2_CLUSTER_ZERO; } else if (!(l2_entry & L2E_OFFSET_MASK)) { return QCOW2_CLUSTER_UNALLOCATED; } else { -- cgit v1.2.3 From cfcc4c62ffe37170cf4b0c9e7acbd2dbaa792598 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 12 Apr 2012 15:20:27 +0200 Subject: qcow2: Support for feature table header extension Instead of printing an ugly bitmask, qemu can now print a more helpful string even for yet unknown features. Signed-off-by: Kevin Wolf --- block/qcow2.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- block/qcow2.h | 12 +++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/qcow2.c b/block/qcow2.c index 9a8b354d0e..41b195d91b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -54,6 +54,7 @@ typedef struct { } QCowExtension; #define QCOW2_EXT_MAGIC_END 0 #define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA +#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) { @@ -76,7 +77,7 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) * return 0 upon success, non-0 otherwise */ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, - uint64_t end_offset) + uint64_t end_offset, void **p_feature_table) { BDRVQcowState *s = bs->opaque; QCowExtension ext; @@ -134,6 +135,18 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, #endif break; + case QCOW2_EXT_MAGIC_FEATURE_TABLE: + if (p_feature_table != NULL) { + void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); + ret = bdrv_pread(bs->file, offset , feature_table, ext.len); + if (ret < 0) { + return ret; + } + + *p_feature_table = feature_table; + } + break; + default: /* unknown magic - save it in case we need to rewrite the header */ { @@ -182,6 +195,24 @@ static void report_unsupported(BlockDriverState *bs, const char *fmt, ...) bs->device_name, "qcow2", msg); } +static void report_unsupported_feature(BlockDriverState *bs, + Qcow2Feature *table, uint64_t mask) +{ + while (table && table->name[0] != '\0') { + if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) { + if (mask & (1 << table->bit)) { + report_unsupported(bs, "%.46s",table->name); + mask &= ~(1 << table->bit); + } + } + table++; + } + + if (mask) { + report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask); + } +} + static int qcow2_open(BlockDriverState *bs, int flags) { BDRVQcowState *s = bs->opaque; @@ -245,14 +276,23 @@ static int qcow2_open(BlockDriverState *bs, int flags) } } + if (header.backing_file_offset) { + ext_end = header.backing_file_offset; + } else { + ext_end = 1 << header.cluster_bits; + } + /* Handle feature bits */ s->incompatible_features = header.incompatible_features; s->compatible_features = header.compatible_features; s->autoclear_features = header.autoclear_features; if (s->incompatible_features != 0) { - report_unsupported(bs, "incompatible features mask %" PRIx64, - header.incompatible_features); + void *feature_table = NULL; + qcow2_read_extensions(bs, header.header_length, ext_end, + &feature_table); + report_unsupported_feature(bs, feature_table, + s->incompatible_features); ret = -ENOTSUP; goto fail; } @@ -343,12 +383,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) QLIST_INIT(&s->cluster_allocs); /* read qcow2 extensions */ - if (header.backing_file_offset) { - ext_end = header.backing_file_offset; - } else { - ext_end = s->cluster_size; - } - if (qcow2_read_extensions(bs, header.header_length, ext_end)) { + if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) { ret = -EINVAL; goto fail; } @@ -912,6 +947,19 @@ int qcow2_update_header(BlockDriverState *bs) buflen -= ret; } + /* Feature table */ + Qcow2Feature features[] = { + /* no feature defined yet */ + }; + + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, + features, sizeof(features), buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; + /* Keep unknown header extensions */ QLIST_FOREACH(uext, &s->unknown_header_ext, next) { ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen); diff --git a/block/qcow2.h b/block/qcow2.h index df2bdfd0c6..2a982445a1 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -104,6 +104,18 @@ typedef struct Qcow2UnknownHeaderExtension { uint8_t data[]; } Qcow2UnknownHeaderExtension; +enum { + QCOW2_FEAT_TYPE_INCOMPATIBLE = 0, + QCOW2_FEAT_TYPE_COMPATIBLE = 1, + QCOW2_FEAT_TYPE_AUTOCLEAR = 2, +}; + +typedef struct Qcow2Feature { + uint8_t type; + uint8_t bit; + char name[46]; +} QEMU_PACKED Qcow2Feature; + typedef struct BDRVQcowState { int cluster_bits; int cluster_size; -- cgit v1.2.3 From 621f058940ea9f1ef3d8774ef3203544f1228df1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Mar 2012 15:12:58 +0100 Subject: qcow2: Zero write support Signed-off-by: Kevin Wolf --- block.c | 14 +++++++--- block/qcow2-cluster.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.c | 21 +++++++++++++++ block/qcow2.h | 1 + 4 files changed, 105 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block.c b/block.c index 421cd1bebd..fe74dddb13 100644 --- a/block.c +++ b/block.c @@ -80,6 +80,8 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, void *opaque, bool is_write); static void coroutine_fn bdrv_co_do_rw(void *opaque); +static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, bool is_write, double elapsed_time, uint64_t *wait); @@ -1708,8 +1710,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, if (drv->bdrv_co_write_zeroes && buffer_is_zero(bounce_buffer, iov.iov_len)) { - ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num, - cluster_nb_sectors); + ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, + cluster_nb_sectors); } else { ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, &bounce_qiov); @@ -1819,9 +1821,15 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, struct iovec iov; int ret; + /* TODO Emulate only part of misaligned requests instead of letting block + * drivers return -ENOTSUP and emulate everything */ + /* First try the efficient write zeroes operation */ if (drv->bdrv_co_write_zeroes) { - return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors); + if (ret != -ENOTSUP) { + return ret; + } } /* Fall back to bounce buffer if write zeroes is unsupported */ diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 5e5f5cf9ad..a747a88e13 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1102,3 +1102,75 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, return 0; } + +/* + * This zeroes as many clusters of nb_clusters as possible at once (i.e. + * all clusters in the same L2 table) and returns the number of zeroed + * clusters. + */ +static int zero_single_l2(BlockDriverState *bs, uint64_t offset, + unsigned int nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + uint64_t *l2_table; + int l2_index; + int ret; + int i; + + ret = get_cluster_table(bs, offset, &l2_table, &l2_index); + if (ret < 0) { + return ret; + } + + /* Limit nb_clusters to one L2 table */ + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + for (i = 0; i < nb_clusters; i++) { + uint64_t old_offset; + + old_offset = be64_to_cpu(l2_table[l2_index + i]); + + /* Update L2 entries */ + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); + if (old_offset & QCOW_OFLAG_COMPRESSED) { + l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); + qcow2_free_any_clusters(bs, old_offset, 1); + } else { + l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); + } + } + + ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (ret < 0) { + return ret; + } + + return nb_clusters; +} + +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) +{ + BDRVQcowState *s = bs->opaque; + unsigned int nb_clusters; + int ret; + + /* The zero flag is only supported by version 3 and newer */ + if (s->qcow_version < 3) { + return -ENOTSUP; + } + + /* Each L2 table is handled by its own loop iteration */ + nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); + + while (nb_clusters > 0) { + ret = zero_single_l2(bs, offset, nb_clusters); + if (ret < 0) { + return ret; + } + + nb_clusters -= ret; + offset += (ret * s->cluster_size); + } + + return 0; +} diff --git a/block/qcow2.c b/block/qcow2.c index 41b195d91b..ad46c03c2f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1281,6 +1281,26 @@ static int qcow2_make_empty(BlockDriverState *bs) return 0; } +static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + int ret; + BDRVQcowState *s = bs->opaque; + + /* Emulate misaligned zero writes */ + if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { + return -ENOTSUP; + } + + /* Whatever is left can use real zero clusters */ + qemu_co_mutex_lock(&s->lock); + ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS, + nb_sectors); + qemu_co_mutex_unlock(&s->lock); + + return ret; +} + static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { @@ -1558,6 +1578,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_co_writev = qcow2_co_writev, .bdrv_co_flush_to_os = qcow2_co_flush_to_os, + .bdrv_co_write_zeroes = qcow2_co_write_zeroes, .bdrv_co_discard = qcow2_co_discard, .bdrv_truncate = qcow2_truncate, .bdrv_write_compressed = qcow2_write_compressed, diff --git a/block/qcow2.h b/block/qcow2.h index 2a982445a1..93567f6451 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -283,6 +283,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); +int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); -- cgit v1.2.3