📄 block-qcow2.c
字号:
/* * Block driver for the QCOW version 2 format * * Copyright (c) 2004-2006 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */#include <zlib.h>#include "aes.h"#include <assert.h>#include <stdint.h>#include <fcntl.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include "tapdisk.h"#include "tapaio.h"#include "bswap.h"#define USE_AIO#define qemu_malloc malloc#define qemu_mallocz(size) calloc(1, size)#define qemu_free free#ifndef O_BINARY#define O_BINARY 0#endif/* *BSD has no O_LARGEFILE */#ifndef O_LARGEFILE#define O_LARGEFILE 0 #endif#define BLOCK_FLAG_ENCRYPT 1/* Differences with QCOW: - Support for multiple incremental snapshots. - Memory management by reference counts. - Clusters which have a reference count of one have the bit QCOW_OFLAG_COPIED to optimize write performance. - Size of compressed clusters is stored in sectors to reduce bit usage in the cluster offsets. - Support for storing additional data (such as the VM state) in the snapshots. - If a backing store is used, the cluster size is not constrained (could be backported to QCOW). - L2 tables have always a size of one cluster.*///#define DEBUG_ALLOC//#define DEBUG_ALLOC2#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)#define QCOW_VERSION 2#define QCOW_CRYPT_NONE 0#define QCOW_CRYPT_AES 1/* indicate that the refcount of the referenced cluster is exactly one. */#define QCOW_OFLAG_COPIED (1LL << 63)/* indicate that the cluster is compressed (they never have the copied flag) */#define QCOW_OFLAG_COMPRESSED (1LL << 62)#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */#ifndef offsetof#define offsetof(type, field) ((size_t) &((type *)0)->field)#endiftypedef struct QCowHeader { uint32_t magic; uint32_t version; uint64_t backing_file_offset; uint32_t backing_file_size; uint32_t cluster_bits; uint64_t size; /* in bytes */ uint32_t crypt_method; uint32_t l1_size; /* XXX: save number of clusters instead ? */ uint64_t l1_table_offset; uint64_t refcount_table_offset; uint32_t refcount_table_clusters; uint32_t nb_snapshots; uint64_t snapshots_offset;} QCowHeader;typedef struct __attribute__((packed)) QCowSnapshotHeader { /* header is 8 byte aligned */ uint64_t l1_table_offset; uint32_t l1_size; uint16_t id_str_size; uint16_t name_size; uint32_t date_sec; uint32_t date_nsec; uint64_t vm_clock_nsec; uint32_t vm_state_size; uint32_t extra_data_size; /* for extension */ /* extra data follows */ /* id_str follows */ /* name follows */} QCowSnapshotHeader;#define L2_CACHE_SIZE 16typedef struct QCowSnapshot { uint64_t l1_table_offset; uint32_t l1_size; char *id_str; char *name; uint32_t vm_state_size; uint32_t date_sec; uint32_t date_nsec; uint64_t vm_clock_nsec;} QCowSnapshot;typedef struct BDRVQcowState { /* blktap additions */ int fd; int poll_pipe[2]; /* dummy fd for polling on */ char* name; int encrypted; char backing_file[1024]; struct disk_driver* backing_hd; int64_t total_sectors; tap_aio_context_t async; /* Original qemu variables */ int cluster_bits; int cluster_size; int cluster_sectors; int l2_bits; int l2_size; int l1_size; int l1_vm_state_index; int csize_shift; int csize_mask; uint64_t cluster_offset_mask; uint64_t l1_table_offset; uint64_t *l1_table; uint64_t *l2_cache; uint64_t l2_cache_offsets[L2_CACHE_SIZE]; uint32_t l2_cache_counts[L2_CACHE_SIZE]; uint8_t *cluster_cache; uint8_t *cluster_data; uint64_t cluster_cache_offset; uint64_t *refcount_table; uint64_t refcount_table_offset; uint32_t refcount_table_size; uint64_t refcount_block_cache_offset; uint16_t *refcount_block_cache; int64_t free_cluster_index; int64_t free_byte_offset; uint32_t crypt_method; /* current crypt method, 0 if no key yet */ uint32_t crypt_method_header; AES_KEY aes_encrypt_key; AES_KEY aes_decrypt_key; uint64_t snapshots_offset; int snapshots_size; int nb_snapshots; QCowSnapshot *snapshots;} BDRVQcowState;static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);static int qcow_read(struct disk_driver *bs, uint64_t sector_num, uint8_t *buf, int nb_sectors);static int qcow_read_snapshots(struct disk_driver *bs);static void qcow_free_snapshots(struct disk_driver *bs);static int refcount_init(struct disk_driver *bs);static void refcount_close(struct disk_driver *bs);static int get_refcount(struct disk_driver *bs, int64_t cluster_index);static int update_cluster_refcount(struct disk_driver *bs, int64_t cluster_index, int addend);static void update_refcount(struct disk_driver *bs, int64_t offset, int64_t length, int addend);static int64_t alloc_clusters(struct disk_driver *bs, int64_t size);static int64_t alloc_bytes(struct disk_driver *bs, int size);static void free_clusters(struct disk_driver *bs, int64_t offset, int64_t size);#ifdef DEBUG_ALLOCstatic void check_refcounts(struct disk_driver *bs);#endifstatic int qcow_sync_read(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *prv);/** * Read with byte offsets */static int bdrv_pread(int fd, int64_t offset, void *buf, int count){ int ret; if (lseek(fd, offset, SEEK_SET) == -1) { DPRINTF("bdrv_pread failed seek (%#"PRIx64").\n", offset); return -1; } ret = read(fd, buf, count); if (ret < 0) { if (lseek(fd, 0, SEEK_END) >= offset) { DPRINTF("bdrv_pread read failed (%#"PRIx64", END = %#"PRIx64").\n", offset, lseek(fd, 0, SEEK_END)); return -1; } /* Read beyond end of file. Reading zeros. */ memset(buf, 0, count); ret = count; } else if (ret < count) { /* Read beyond end of file. Filling up with zeros. */ memset(buf + ret, 0, count - ret); ret = count; } return ret;}/** * Write with byte offsets */static int bdrv_pwrite(int fd, int64_t offset, const void *buf, int count){ if (lseek(fd, offset, SEEK_SET) == -1) { DPRINTF("bdrv_pwrite failed seek (%#"PRIx64").\n", offset); return -1; } return write(fd, buf, count);}/** * Read with sector offsets */static int bdrv_read(int fd, int64_t offset, void *buf, int count){ return bdrv_pread(fd, 512 * offset, buf, 512 * count);}/** * Write with sector offsets */static int bdrv_write(int fd, int64_t offset, const void *buf, int count){ return bdrv_pwrite(fd, 512 * offset, buf, count);}static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename){ const QCowHeader *cow_header = (const void *)buf; if (buf_size >= sizeof(QCowHeader) && be32_to_cpu(cow_header->magic) == QCOW_MAGIC && be32_to_cpu(cow_header->version) == QCOW_VERSION) return 100; else return 0;}static int qcow_open(struct disk_driver *bs, const char *filename, td_flag_t flags){ BDRVQcowState *s = bs->private; int len, i, shift, ret, max_aio_reqs; QCowHeader header; int fd, o_flags; o_flags = O_LARGEFILE | ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR); DPRINTF("Opening %s\n", filename); fd = open(filename, o_flags); if (fd < 0) { DPRINTF("Unable to open %s (%d)\n", filename, 0 - errno); return -1; } s->fd = fd; if (asprintf(&s->name,"%s", filename) == -1) { close(fd); return -1; } ret = read(fd, &header, sizeof(header)); if (ret != sizeof(header)) { DPRINTF(" ret = %d, errno = %d\n", ret, errno); goto fail; } be32_to_cpus(&header.magic); be32_to_cpus(&header.version); be64_to_cpus(&header.backing_file_offset); be32_to_cpus(&header.backing_file_size); be64_to_cpus(&header.size); be32_to_cpus(&header.cluster_bits); be32_to_cpus(&header.crypt_method); be64_to_cpus(&header.l1_table_offset); be32_to_cpus(&header.l1_size); be64_to_cpus(&header.refcount_table_offset); be32_to_cpus(&header.refcount_table_clusters); be64_to_cpus(&header.snapshots_offset); be32_to_cpus(&header.nb_snapshots); if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION) goto fail; if (header.size <= 1 || header.cluster_bits < 9 || header.cluster_bits > 16) goto fail; s->crypt_method = 0; if (header.crypt_method > QCOW_CRYPT_AES) goto fail; s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) s->encrypted = 1; s->cluster_bits = header.cluster_bits; s->cluster_size = 1 << s->cluster_bits; s->cluster_sectors = 1 << (s->cluster_bits - 9); s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */ s->l2_size = 1 << s->l2_bits; s->total_sectors = header.size / 512; s->csize_shift = (62 - (s->cluster_bits - 8)); s->csize_mask = (1 << (s->cluster_bits - 8)) - 1; s->cluster_offset_mask = (1LL << s->csize_shift) - 1; s->refcount_table_offset = header.refcount_table_offset; s->refcount_table_size = header.refcount_table_clusters << (s->cluster_bits - 3); s->snapshots_offset = header.snapshots_offset; s->nb_snapshots = header.nb_snapshots;// DPRINTF("-- cluster_bits/size/sectors = %d/%d/%d\n",// s->cluster_bits, s->cluster_size, s->cluster_sectors);// DPRINTF("-- l2_bits/sizes = %d/%d\n",// s->l2_bits, s->l2_size); /* Set sector size and number */ bs->td_state->sector_size = 512; bs->td_state->size = header.size / 512; bs->td_state->info = 0; /* read the level 1 table */ s->l1_size = header.l1_size; shift = s->cluster_bits + s->l2_bits; s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift; /* the L1 table must contain at least enough entries to put header.size bytes */ if (s->l1_size < s->l1_vm_state_index) { DPRINTF("L1 table tooo small\n"); goto fail; } s->l1_table_offset = header.l1_table_offset; s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t)); if (!s->l1_table) goto fail; if (lseek(fd, s->l1_table_offset, SEEK_SET) == -1) goto fail; if (read(fd, s->l1_table, s->l1_size * sizeof(uint64_t)) != s->l1_size * sizeof(uint64_t)) { DPRINTF("Could not read L1 table\n"); goto fail; } for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } /* alloc L2 cache */ s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); if (!s->l2_cache) goto fail; s->cluster_cache = qemu_malloc(s->cluster_size); if (!s->cluster_cache) goto fail; /* one more sector for decompressed data alignment */ s->cluster_data = qemu_malloc(s->cluster_size + 512); if (!s->cluster_data) goto fail; s->cluster_cache_offset = -1; if (refcount_init(bs) < 0) goto fail; /* read the backing file name */ s->backing_file[0] = '\0'; if (header.backing_file_offset != 0) { len = header.backing_file_size; if (len > 1023) len = 1023; if (lseek(fd, header.backing_file_offset, SEEK_SET) == -1) { DPRINTF("Could not lseek to %#"PRIx64"\n", header.backing_file_offset); goto fail; } if (read(fd, s->backing_file, len) != len) { DPRINTF("Could not read %#x bytes from %#"PRIx64": %s\n", len, header.backing_file_offset, strerror(errno)); goto fail; } s->backing_file[len] = '\0'; }#if 0 s->backing_hd = NULL; if (qcow_read_snapshots(bs) < 0) { DPRINTF("Could not read backing files\n"); goto fail; }#endif#ifdef DEBUG_ALLOC check_refcounts(bs);#endif /* Initialize fds */ for(i = 0; i < MAX_IOFD; i++) bs->io_fd[i] = 0;#ifdef USE_AIO /* Initialize AIO */ /* A segment (i.e. a page) can span multiple clusters */ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) * MAX_SEGMENTS_PER_REQ * MAX_REQUESTS; if (tap_aio_init(&s->async, bs->td_state->size, max_aio_reqs)) { DPRINTF("Unable to initialise AIO state\n"); tap_aio_free(&s->async); goto fail; } bs->io_fd[0] = s->async.aio_ctx.pollfd; #else /* Synchronous IO */ if (pipe(s->poll_pipe)) goto fail; bs->io_fd[0] = s->poll_pipe[0];#endif return 0; fail: DPRINTF("qcow_open failed\n");#ifdef USE_AIO
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -