📄 block-qcow.c
字号:
/* block-qcow.c
*
* Asynchronous Qemu copy-on-write disk implementation.
* Code based on the Qemu implementation
* (see copyright notice below)
*
* (c) 2006 Andrew Warfield and Julian Chesterfield
*
*/
/*
* Block driver for the QCOW format
*
* Copyright (c) 2004 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files(the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*/
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/statvfs.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <string.h>
#include <zlib.h>
#include <inttypes.h>
#include <libaio.h>
#include <openssl/md5.h>
#include "bswap.h"
#include "aes.h"
#include "tapdisk.h"
#include "tapdisk-driver.h"
#include "tapdisk-interface.h"
#include "qcow.h"
#include "blk.h"
#include "atomicio.h"
/* *BSD has no O_LARGEFILE */
#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif
#if 1
#define ASSERT(_p) \
if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
__LINE__, __FILE__); *(int*)0=0; }
#else
#define ASSERT(_p) ((void)0)
#endif
struct pending_aio {
td_callback_t cb;
int id;
void *private;
int nb_sectors;
char *buf;
uint64_t sector;
};
#undef IOCB_IDX
#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
#define ZERO_TEST(_b) (_b | 0x00)
struct qcow_request {
td_request_t treq;
struct tiocb tiocb;
struct tdqcow_state *state;
};
static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
#ifdef USE_GCRYPT
#include <gcrypt.h>
uint32_t gen_cksum(char *ptr, int len)
{
int i;
uint32_t md[4];
/* Generate checksum */
gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len);
return md[0];
}
#else /* use libcrypto */
#include <openssl/md5.h>
uint32_t gen_cksum(char *ptr, int len)
{
int i;
unsigned char *md;
uint32_t ret;
md = malloc(MD5_DIGEST_LENGTH);
if(!md) return 0;
/* Generate checksum */
if (MD5((unsigned char *)ptr, len, md) != md)
ret = 0;
else
memcpy(&ret, md, sizeof(uint32_t));
free(md);
return ret;
}
#endif
static void free_aio_state(struct tdqcow_state* s)
{
free(s->aio_requests);
free(s->aio_free_list);
}
static int init_aio_state(td_driver_t *driver)
{
int i, ret;
td_disk_info_t *bs = &(driver->info);
struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
// A segment (i.e. a page) can span multiple clusters
s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
s->aio_free_count = s->max_aio_reqs;
if (!(s->aio_requests = calloc(s->max_aio_reqs, sizeof(struct qcow_request))) ||
!(s->aio_free_list = calloc(s->max_aio_reqs, sizeof(struct qcow_request)))) {
DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
s->max_aio_reqs);
goto fail;
}
for (i = 0; i < s->max_aio_reqs; i++)
s->aio_free_list[i] = &s->aio_requests[i];
DPRINTF("AIO state initialised\n");
return 0;
fail:
return -1;
}
int get_filesize(char *filename, uint64_t *size, struct stat *st)
{
int fd;
QCowHeader header;
/*Set to the backing file size*/
fd = open(filename, O_RDONLY);
if (fd < 0)
return -1;
if (read(fd, &header, sizeof(header)) < sizeof(header)) {
close(fd);
return -1;
}
close(fd);
be32_to_cpus(&header.magic);
be64_to_cpus(&header.size);
if (header.magic == QCOW_MAGIC) {
*size = header.size >> SECTOR_SHIFT;
return 0;
}
if(S_ISBLK(st->st_mode)) {
fd = open(filename, O_RDONLY);
if (fd < 0)
return -1;
if (blk_getimagesize(fd, size) != 0) {
printf("Unable to get Block device size\n");
close(fd);
return -1;
}
close(fd);
} else *size = (st->st_size >> SECTOR_SHIFT);
return 0;
}
static int qcow_set_key(struct tdqcow_state *s, const char *key)
{
uint8_t keybuf[16];
int len, i;
memset(keybuf, 0, 16);
len = strlen(key);
if (len > 16)
len = 16;
/* XXX: we could compress the chars to 7 bits to increase
entropy */
for (i = 0; i < len; i++) {
keybuf[i] = key[i];
}
s->crypt_method = s->crypt_method_header;
if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
return -1;
if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
return -1;
#if 0
/* test */
{
uint8_t in[16];
uint8_t out[16];
uint8_t tmp[16];
for (i=0; i<16; i++)
in[i] = i;
AES_encrypt(in, tmp, &s->aes_encrypt_key);
AES_decrypt(tmp, out, &s->aes_decrypt_key);
for (i = 0; i < 16; i++)
DPRINTF(" %02x", tmp[i]);
DPRINTF("\n");
for (i = 0; i < 16; i++)
DPRINTF(" %02x", out[i]);
DPRINTF("\n");
}
#endif
return 0;
}
void tdqcow_complete(void *arg, struct tiocb *tiocb, int err)
{
struct qcow_request *aio = (struct qcow_request *)arg;
struct tdqcow_state *s = aio->state;
td_complete_request(aio->treq, err);
s->aio_free_list[s->aio_free_count++] = aio;
}
static void async_read(td_driver_t *driver, td_request_t treq)
{
int size;
uint64_t offset;
struct qcow_request *aio;
struct tdqcow_state *prv;
prv = (struct tdqcow_state *)driver->data;
size = treq.secs * driver->info.sector_size;
offset = treq.sec * (uint64_t)driver->info.sector_size;
if (prv->aio_free_count == 0)
goto fail;
aio = prv->aio_free_list[--prv->aio_free_count];
aio->treq = treq;
aio->state = prv;
td_prep_read(&aio->tiocb, prv->fd, treq.buf,
size, offset, tdqcow_complete, aio);
td_queue_tiocb(driver, &aio->tiocb);
return;
fail:
td_complete_request(treq, -EBUSY);
}
static void async_write(td_driver_t *driver, td_request_t treq)
{
int size;
uint64_t offset;
struct qcow_request *aio;
struct tdqcow_state *prv;
prv = (struct tdqcow_state *)driver->data;
size = treq.secs * driver->info.sector_size;
offset = treq.sec * (uint64_t)driver->info.sector_size;
if (prv->aio_free_count == 0)
goto fail;
aio = prv->aio_free_list[--prv->aio_free_count];
aio->treq = treq;
aio->state = prv;
td_prep_write(&aio->tiocb, prv->fd, treq.buf,
size, offset, tdqcow_complete, aio);
td_queue_tiocb(driver, &aio->tiocb);
return;
fail:
td_complete_request(treq, -EBUSY);
}
/*
* The crypt function is compatible with the linux cryptoloop
* algorithm for < 4 GB images. NOTE: out_buf == in_buf is
* supported .
*/
static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, int enc,
const AES_KEY *key)
{
union {
uint64_t ll[2];
uint8_t b[16];
} ivec;
int i;
for (i = 0; i < nb_sectors; i++) {
ivec.ll[0] = cpu_to_le64(sector_num);
ivec.ll[1] = 0;
AES_cbc_encrypt(in_buf, out_buf, 512, key,
ivec.b, enc);
sector_num++;
in_buf += 512;
out_buf += 512;
}
}
int qtruncate(int fd, off_t length, int sparse)
{
int ret, i;
int current = 0, rem = 0;
uint64_t sectors;
struct stat st;
char *buf;
/* If length is greater than the current file len
* we synchronously write zeroes to the end of the
* file, otherwise we truncate the length down
*/
ret = fstat(fd, &st);
if (ret == -1)
return -1;
if (S_ISBLK(st.st_mode))
return 0;
sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
rem = st.st_size % DEFAULT_SECTOR_SIZE;
/* If we are extending this file, we write zeros to the end --
* this tries to ensure that the extents allocated wind up being
* contiguous on disk.
*/
if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
/*We are extending the file*/
if ((ret = posix_memalign((void **)&buf,
512, DEFAULT_SECTOR_SIZE))) {
DPRINTF("posix_memalign failed: %d\n", ret);
return -1;
}
memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
if (lseek(fd, 0, SEEK_END)==-1) {
DPRINTF("Lseek EOF failed (%d), internal error\n",
errno);
free(buf);
return -1;
}
if (rem) {
ret = write(fd, buf, rem);
if (ret != rem) {
DPRINTF("write failed: ret = %d, err = %s\n",
ret, strerror(errno));
free(buf);
return -1;
}
}
for (i = current; i < sectors; i++ ) {
ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
if (ret != DEFAULT_SECTOR_SIZE) {
DPRINTF("write failed: ret = %d, err = %s\n",
ret, strerror(errno));
free(buf);
return -1;
}
}
free(buf);
} else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
return -1;
}
return 0;
}
/* 'allocate' is:
*
* 0 to not allocate.
*
* 1 to allocate a normal cluster (for sector indexes 'n_start' to
* 'n_end')
*
* 2 to allocate a compressed cluster of size
* 'compressed_size'. 'compressed_size' must be > 0 and <
* cluster_size
*
* return 0 if not allocated.
*/
static uint64_t get_cluster_offset(struct tdqcow_state *s,
uint64_t offset, int allocate,
int compressed_size,
int n_start, int n_end)
{
int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
char *tmp_ptr2, *l2_ptr, *l1_ptr;
uint64_t *tmp_ptr;
uint64_t l2_offset, *l2_table, cluster_offset, tmp;
uint32_t min_count;
int new_l2_table;
/*Check L1 table for the extent offset*/
l1_index = offset >> (s->l2_bits + s->cluster_bits);
l2_offset = s->l1_table[l1_index];
new_l2_table = 0;
if (!l2_offset) {
if (!allocate)
return 0;
/*
* allocating a new l2 entry + extent
* at the end of the file, we must also
* update the L1 entry safely.
*/
l2_offset = s->fd_end;
/* round to cluster size */
l2_offset = (l2_offset + s->cluster_size - 1)
& ~(s->cluster_size - 1);
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
/*Truncate file for L2 table
*(initialised to zero in case we crash)*/
if (qtruncate(s->fd,
l2_offset + (s->l2_size * sizeof(uint64_t)),
s->sparse) != 0) {
DPRINTF("ERROR truncating file\n");
return 0;
}
s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
/*Update the L1 table entry on disk
* (for O_DIRECT we write 4KByte blocks)*/
l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
l1_ptr = (char *)s->l1_table + (l1_sector << 12);
if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
DPRINTF("ERROR allocating memory for L1 table\n");
}
memcpy(tmp_ptr, l1_ptr, 4096);
/* Convert block to write to big endian */
for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
cpu_to_be64s(&tmp_ptr[i]);
}
/*
* Issue non-asynchronous L1 write.
* For safety, we must ensure that
* entry is written before blocks.
*/
lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
if (write(s->fd, tmp_ptr, 4096) != 4096) {
free(tmp_ptr);
return 0;
}
free(tmp_ptr);
new_l2_table = 1;
goto cache_miss;
} else if (s->min_cluster_alloc == s->l2_size) {
/*Fast-track the request*/
cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
return cluster_offset + (l2_index * s->cluster_size);
}
/*Check to see if L2 entry is already cached*/
for (i = 0; i < L2_CACHE_SIZE; i++) {
if (l2_offset == s->l2_cache_offsets[i]) {
/* increment the hit count */
if (++s->l2_cache_counts[i] == 0xffffffff) {
for (j = 0; j < L2_CACHE_SIZE; j++) {
s->l2_cache_counts[j] >>= 1;
}
}
l2_table = s->l2_cache + (i << s->l2_bits);
goto found;
}
}
cache_miss:
/* not found: load a new entry in the least used one */
min_index = 0;
min_count = 0xffffffff;
for (i = 0; i < L2_CACHE_SIZE; i++) {
if (s->l2_cache_counts[i] < min_count) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -