📄 block-qcow.c
字号:
min_count = s->l2_cache_counts[i];
min_index = i;
}
}
l2_table = s->l2_cache + (min_index << s->l2_bits);
/*If extent pre-allocated, read table from disk,
*otherwise write new table to disk*/
if (new_l2_table) {
/*Should we allocate the whole extent? Adjustable parameter.*/
if (s->cluster_alloc == s->l2_size) {
cluster_offset = l2_offset +
(s->l2_size * sizeof(uint64_t));
cluster_offset = (cluster_offset + s->cluster_size - 1)
& ~(s->cluster_size - 1);
if (qtruncate(s->fd, cluster_offset +
(s->cluster_size * s->l2_size),
s->sparse) != 0) {
DPRINTF("ERROR truncating file\n");
return 0;
}
s->fd_end = cluster_offset +
(s->cluster_size * s->l2_size);
for (i = 0; i < s->l2_size; i++) {
l2_table[i] = cpu_to_be64(cluster_offset +
(i*s->cluster_size));
}
} else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
lseek(s->fd, l2_offset, SEEK_SET);
if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
} else {
lseek(s->fd, l2_offset, SEEK_SET);
if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
}
/*Update the cache entries*/
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
found:
/*The extent is split into 's->l2_size' blocks of
*size 's->cluster_size'*/
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
cluster_offset = be64_to_cpu(l2_table[l2_index]);
if (!cluster_offset ||
((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
if (!allocate)
return 0;
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
(n_end - n_start) < s->cluster_sectors) {
/* cluster is already allocated but compressed, we must
decompress it in the case it is not completely
overwritten */
if (decompress_cluster(s, cluster_offset) < 0)
return 0;
cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
cluster_offset = (cluster_offset + s->cluster_size - 1)
& ~(s->cluster_size - 1);
/* write the cluster content - not asynchronous */
lseek(s->fd, cluster_offset, SEEK_SET);
if (write(s->fd, s->cluster_cache, s->cluster_size) !=
s->cluster_size)
return -1;
} else {
/* allocate a new cluster */
cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
if (allocate == 1) {
/* round to cluster size */
cluster_offset =
(cluster_offset + s->cluster_size - 1)
& ~(s->cluster_size - 1);
if (qtruncate(s->fd, cluster_offset +
s->cluster_size, s->sparse)!=0) {
DPRINTF("ERROR truncating file\n");
return 0;
}
s->fd_end = (cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (s->crypt_method &&
(n_end - n_start) < s->cluster_sectors) {
uint64_t start_sect;
start_sect = (offset &
~(s->cluster_size - 1))
>> 9;
memset(s->cluster_data + 512,
0xaa, 512);
for (i = 0; i < s->cluster_sectors;i++)
{
if (i < n_start || i >= n_end)
{
encrypt_sectors(s, start_sect + i,
s->cluster_data,
s->cluster_data + 512, 1, 1,
&s->aes_encrypt_key);
lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
if (write(s->fd, s->cluster_data, 512) != 512)
return -1;
}
}
}
} else {
cluster_offset |= QCOW_OFLAG_COMPRESSED |
(uint64_t)compressed_size
<< (63 - s->cluster_bits);
}
}
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
/*For IO_DIRECT we write 4KByte blocks*/
l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
l2_ptr = (char *)l2_table + (l2_sector << 12);
if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
DPRINTF("ERROR allocating memory for L1 table\n");
}
memcpy(tmp_ptr2, l2_ptr, 4096);
lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
if (write(s->fd, tmp_ptr2, 4096) != 4096) {
free(tmp_ptr2);
return -1;
}
free(tmp_ptr2);
}
return cluster_offset;
}
static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
int nb_sectors, int *pnum)
{
int index_in_cluster, n;
uint64_t cluster_offset;
cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
index_in_cluster = sector_num & (s->cluster_sectors - 1);
n = s->cluster_sectors - index_in_cluster;
if (n > nb_sectors)
n = nb_sectors;
*pnum = n;
return (cluster_offset != 0);
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
const uint8_t *buf, int buf_size)
{
z_stream strm1, *strm = &strm1;
int ret, out_len;
memset(strm, 0, sizeof(*strm));
strm->next_in = (uint8_t *)buf;
strm->avail_in = buf_size;
strm->next_out = out_buf;
strm->avail_out = out_buf_size;
ret = inflateInit2(strm, -12);
if (ret != Z_OK)
return -1;
ret = inflate(strm, Z_FINISH);
out_len = strm->next_out - out_buf;
if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
(out_len != out_buf_size) ) {
inflateEnd(strm);
return -1;
}
inflateEnd(strm);
return 0;
}
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
{
int ret, csize;
uint64_t coffset;
coffset = cluster_offset & s->cluster_offset_mask;
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
lseek(s->fd, coffset, SEEK_SET);
ret = read(s->fd, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
s->cluster_data, csize) < 0) {
return -1;
}
s->cluster_cache_offset = coffset;
}
return 0;
}
static int
tdqcow_read_header(int fd, QCowHeader *header)
{
int err;
char *buf;
struct stat st;
size_t size, expected;
memset(header, 0, sizeof(*header));
err = fstat(fd, &st);
if (err)
return -errno;
err = lseek(fd, 0, SEEK_SET);
if (err == (off_t)-1)
return -errno;
size = (sizeof(*header) + 511) & ~511;
err = posix_memalign((void **)&buf, 512, size);
if (err)
return err;
expected = size;
if (st.st_size < size)
expected = st.st_size;
errno = 0;
err = read(fd, buf, size);
if (err != expected) {
err = (errno ? -errno : -EIO);
goto out;
}
memcpy(header, buf, sizeof(*header));
be32_to_cpus(&header->magic);
be32_to_cpus(&header->version);
be64_to_cpus(&header->backing_file_offset);
be32_to_cpus(&header->backing_file_size);
be32_to_cpus(&header->mtime);
be64_to_cpus(&header->size);
be32_to_cpus(&header->crypt_method);
be64_to_cpus(&header->l1_table_offset);
err = 0;
out:
free(buf);
return err;
}
static int
tdqcow_load_l1_table(struct tdqcow_state *s, QCowHeader *header)
{
char *buf;
struct stat st;
size_t expected;
int i, err, shift;
QCowHeader_ext *exthdr;
uint32_t l1_table_bytes, l1_table_block, l1_table_size;
buf = NULL;
s->l1_table = NULL;
shift = s->cluster_bits + s->l2_bits;
s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
s->l1_table_offset = header->l1_table_offset;
s->min_cluster_alloc = 1; /* default */
l1_table_bytes = s->l1_size * sizeof(uint64_t);
l1_table_size = (l1_table_bytes + 4095) & ~4095;
l1_table_block = (l1_table_bytes + s->l1_table_offset + 4095) & ~4095;
DPRINTF("L1 Table offset detected: %"PRIu64", size %d (%d)\n",
(long long)s->l1_table_offset,
(int) (s->l1_size * sizeof(uint64_t)),
l1_table_size);
err = fstat(s->fd, &st);
if (err) {
err = -errno;
goto out;
}
err = lseek(s->fd, 0, SEEK_SET);
if (err == (off_t)-1) {
err = -errno;
goto out;
}
err = posix_memalign((void **)&buf, 512, l1_table_block);
if (err) {
buf = NULL;
goto out;
}
err = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
if (err) {
s->l1_table = NULL;
goto out;
}
memset(buf, 0, l1_table_block);
memset(s->l1_table, 0, l1_table_size);
expected = l1_table_block;
if (st.st_size < l1_table_block)
expected = st.st_size;
errno = 0;
err = read(s->fd, buf, l1_table_block);
if (err != expected) {
err = (errno ? -errno : -EIO);
goto out;
}
memcpy(s->l1_table, buf + s->l1_table_offset, l1_table_size);
exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
/* check for xen extended header */
if (s->l1_table_offset % 4096 == 0 &&
be32_to_cpu(exthdr->xmagic) == XEN_MAGIC) {
uint32_t flags = be32_to_cpu(exthdr->flags);
uint32_t cksum = be32_to_cpu(exthdr->cksum);
/*
* Try to detect old tapdisk images. They have to be fixed
* because they use big endian rather than native endian for
* the L1 table. After this block, the l1 table will
* definitely be in BIG endian.
*/
if (!(flags & EXTHDR_L1_BIG_ENDIAN)) {
DPRINTF("qcow: converting to big endian L1 table\n");
/* convert to big endian */
for (i = 0; i < s->l1_size; i++)
cpu_to_be64s(&s->l1_table[i]);
flags |= EXTHDR_L1_BIG_ENDIAN;
exthdr->flags = cpu_to_be32(flags);
memcpy(buf + s->l1_table_offset,
s->l1_table, l1_table_size);
err = lseek(s->fd, 0, SEEK_SET);
if (err == (off_t)-1) {
err = -errno;
goto out;
}
err = atomicio(vwrite, s->fd, buf, l1_table_block);
if (err != l1_table_block) {
err = -errno;
goto out;
}
}
/* check the L1 table checksum */
if (cksum != gen_cksum((char *)s->l1_table,
s->l1_size * sizeof(uint64_t)))
DPRINTF("qcow: bad L1 checksum\n");
else {
s->extended = 1;
s->sparse = (be32_to_cpu(exthdr->flags) & SPARSE_FILE);
s->min_cluster_alloc =
be32_to_cpu(exthdr->min_cluster_alloc);
}
}
/* convert L1 table to native endian for operation */
for (i = 0; i < s->l1_size; i++)
be64_to_cpus(&s->l1_table[i]);
err = 0;
out:
if (err) {
free(buf);
free(s->l1_table);
s->l1_table = NULL;
}
return err;
}
/* Open the disk file and initialize qcow state. */
int tdqcow_open (td_driver_t *driver, const char *name, td_flag_t flags)
{
int fd, len, i, ret, size, o_flags;
td_disk_info_t *bs = &(driver->info);
struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
QCowHeader header;
uint64_t final_cluster = 0;
DPRINTF("QCOW: Opening %s\n", name);
o_flags = O_DIRECT | O_LARGEFILE |
((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
fd = open(name, o_flags);
if (fd < 0) {
DPRINTF("Unable to open %s (%d)\n", name, -errno);
return -1;
}
s->fd = fd;
s->name = strdup(name);
if (!s->name)
goto fail;
if (tdqcow_read_header(fd, &header))
goto fail;
if (header.magic != QCOW_MAGIC)
goto fail;
switch (header.version) {
case QCOW_VERSION:
break;
case 2:
//TODO: Port qcow2 to new blktap framework.
// close(fd);
// dd->drv = &tapdisk_qcow2;
// return dd->drv->td_open(dd, name, flags);
goto fail;
default:
goto fail;
}
if (header.size <= 1 || header.cluster_bits < 9)
goto fail;
if (header.crypt_method > QCOW_CRYPT_AES)
goto fail;
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header)
s->encrypted = 1;
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
s->cluster_sectors = 1 << (s->cluster_bits - 9);
s->l2_bits = header.l2_bits;
s->l2_size = 1 << s->l2_bits;
s->cluster_alloc = s->l2_size;
bs->size = header.size / 512;
s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
s->backing_file_offset = header.backing_file_offset;
s->backing_file_size = header.backing_file_size;
/* allocate and load l1 table */
if (tdqcow_load_l1_table(s, &header))
goto fail;
/* alloc L2 cache */
size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
ret = posix_memalign((void **)&s->l2_cache, 4096, size);
if(ret != 0) goto fail;
size = s->cluster_size;
ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
if(ret != 0) goto fail;
ret = posix_memalign((void **)&s->cluster_data, 4096, size);
if(ret != 0) goto fail;
s->cluster_cache_offset = -1;
if (s->backing_file_offset != 0)
s->cluster_alloc = 1; /*Cannot use pre-alloc*/
bs->sector_size = 512;
bs->info = 0;
for(i = 0; i < s->l1_size; i++)
if (s->l1_table[i] > final_cluster)
final_cluster = s->l1_table[i];
if (init_aio_state(driver)!=0) {
DPRINTF("Unable to initialise AIO state\n");
free_aio_state(s);
goto fail;
}
if (!final_cluster)
s->fd_end = s->l1_table_offset +
((s->l1_size * sizeof(uint64_t) + 4095) & ~4095);
else {
s->fd_end = lseek64(fd, 0, SEEK_END);
if (s->fd_end == (off64_t)-1)
goto fail;
}
return 0;
fail:
DPRINTF("QCOW Open failed\n");
free_aio_state(s);
free(s->l1_table);
free(s->l2_cache);
free(s->cluster_cache);
free(s->cluster_data);
close(fd);
return -1;
}
void tdqcow_queue_read(td_driver_t *driver, td_request_t treq)
{
struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
int ret = 0, index_in_cluster, n, i;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -