📄 xen-mapcache
字号:
# HG changeset patch# User kfraser@localhost.localdomain# Node ID 67a06a9b7b1dca707e1cd3b08ae0a341d6e97b3d# Parent 3f0ca90351e268084fbdb733d70fc596cb46537d[HVM] qemu: Add guest address-space mapping cache.On IA32 host or IA32 PAE host, at present, generally, we can't createan HVM guest with more than 2G memory, because generally it's almostimpossible for Qemu to find a large enough and consecutive virtualaddress space to map an HVM guest's whole physical address space.The attached patch fixes this issue using dynamic mapping based onlittle blocks of memory.Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>Signed-off-by: Keir Fraser <keir@xensource.com>Index: ioemu/vl.c===================================================================--- ioemu.orig/vl.c 2007-05-11 10:04:51.000000000 +0100+++ ioemu/vl.c 2007-05-11 10:04:52.000000000 +0100@@ -275,7 +275,7 @@ for(i = start; i < start + length; i += size) { ioport_read_table[bsize][i] = func; if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)- hw_error("register_ioport_read: invalid opaque");+ hw_error("register_ioport_write: invalid opaque"); ioport_opaque[i] = opaque; } return 0;@@ -6791,6 +6791,157 @@ suspend_requested = 1; } +#if defined(MAPCACHE)++#if defined(__i386__) +#define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */+#define MCACHE_BUCKET_SHIFT 16+#elif defined(__x86_64__)+#define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */+#define MCACHE_BUCKET_SHIFT 20+#endif++#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)++#define BITS_PER_LONG (sizeof(long)*8)+#define BITS_TO_LONGS(bits) \+ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)+#define DECLARE_BITMAP(name,bits) \+ unsigned long name[BITS_TO_LONGS(bits)]+#define test_bit(bit,map) \+ (!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG))))++struct map_cache {+ unsigned long paddr_index;+ uint8_t *vaddr_base;+ DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>PAGE_SHIFT);+};++static struct map_cache *mapcache_entry;+static unsigned long nr_buckets;++/* For most cases (>99.9%), the page address is the same. */+static unsigned long last_address_index = ~0UL;+static uint8_t *last_address_vaddr;++static int qemu_map_cache_init(void)+{+ unsigned long size;++ nr_buckets = (((MAX_MCACHE_SIZE >> PAGE_SHIFT) ++ (1UL << (MCACHE_BUCKET_SHIFT - PAGE_SHIFT)) - 1) >>+ (MCACHE_BUCKET_SHIFT - PAGE_SHIFT));+ fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);++ /*+ * Use mmap() directly: lets us allocate a big hash table with no up-front+ * cost in storage space. The OS will allocate memory only for the buckets+ * that we actually use. All others will contain all zeroes.+ */+ size = nr_buckets * sizeof(struct map_cache);+ size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);+ mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE,+ MAP_SHARED|MAP_ANONYMOUS, 0, 0);+ if (mapcache_entry == MAP_FAILED) {+ errno = ENOMEM;+ return -1;+ }++ return 0;+}++static void qemu_remap_bucket(struct map_cache *entry,+ unsigned long address_index)+{+ uint8_t *vaddr_base;+ unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];+ unsigned int i, j;++ if (entry->vaddr_base != NULL) {+ errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);+ if (errno) {+ fprintf(logfile, "unmap fails %d\n", errno);+ exit(-1);+ }+ }++ for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)+ pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;++ vaddr_base = xc_map_foreign_batch(xc_handle, domid, PROT_READ|PROT_WRITE,+ pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);+ if (vaddr_base == NULL) {+ fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);+ exit(-1);+ }++ entry->vaddr_base = vaddr_base;+ entry->paddr_index = address_index;++ for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i += BITS_PER_LONG) {+ unsigned long word = 0;+ j = ((i + BITS_PER_LONG) > (MCACHE_BUCKET_SIZE >> PAGE_SHIFT)) ?+ (MCACHE_BUCKET_SIZE >> PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG;+ while (j > 0)+ word = (word << 1) | !(pfns[i + --j] & 0xF0000000UL);+ entry->valid_mapping[i / BITS_PER_LONG] = word;+ }+}++uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)+{+ struct map_cache *entry;+ unsigned long address_index = phys_addr >> MCACHE_BUCKET_SHIFT;+ unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);++ if (address_index == last_address_index)+ return last_address_vaddr + address_offset;++ entry = &mapcache_entry[address_index % nr_buckets];++ if (entry->vaddr_base == NULL || entry->paddr_index != address_index ||+ !test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))+ qemu_remap_bucket(entry, address_index);++ if (!test_bit(address_offset>>PAGE_SHIFT, entry->valid_mapping))+ return NULL;++ last_address_index = address_index;+ last_address_vaddr = entry->vaddr_base;++ return last_address_vaddr + address_offset;+}++void qemu_invalidate_map_cache(void)+{+ unsigned long i;++ mapcache_lock();++ for (i = 0; i < nr_buckets; i++) {+ struct map_cache *entry = &mapcache_entry[i];++ if (entry->vaddr_base == NULL)+ continue;++ errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);+ if (errno) {+ fprintf(logfile, "unmap fails %d\n", errno);+ exit(-1);+ }++ entry->paddr_index = 0;+ entry->vaddr_base = NULL;+ }++ last_address_index = ~0UL;+ last_address_vaddr = NULL;++ mapcache_unlock();+}++#endif /* defined(MAPCACHE) */+ int main(int argc, char **argv) { #ifdef CONFIG_GDBSTUB@@ -6827,8 +6978,11 @@ unsigned long ioreq_pfn; extern void *shared_page; extern void *buffered_io_page;- extern void *buffered_pio_page;+#ifdef __ia64__ unsigned long nr_pages;+ xen_pfn_t *page_array;+ extern void *buffered_pio_page;+#endif char qemu_dm_logfilename[64]; @@ -7119,6 +7273,7 @@ break; case QEMU_OPTION_m: ram_size = atol(optarg) * 1024 * 1024;+ ram_size = (uint64_t)atol(optarg) * 1024 * 1024; if (ram_size <= 0) help(); #ifndef CONFIG_DM@@ -7472,30 +7627,15 @@ #if defined(__i386__) || defined(__x86_64__) - nr_pages = ram_size/PAGE_SIZE;-- page_array = (xen_pfn_t *)malloc(nr_pages * sizeof(xen_pfn_t));- if (page_array == NULL) {- fprintf(logfile, "malloc returned error %d\n", errno);- exit(-1);- }-- for ( i = 0; i < nr_pages; i++)- page_array[i] = i;-- phys_ram_base = xc_map_foreign_batch(xc_handle, domid,- PROT_READ|PROT_WRITE, page_array,- nr_pages);- if (phys_ram_base == NULL) {- fprintf(logfile, "batch map guest memory returned error %d\n", errno);+ if (qemu_map_cache_init()) {+ fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno); exit(-1); } xc_get_hvm_param(xc_handle, domid, HVM_PARAM_IOREQ_PFN, &ioreq_pfn); fprintf(logfile, "shared page at pfn %lx\n", ioreq_pfn); shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,- PROT_READ|PROT_WRITE,- page_array[ioreq_pfn]);+ PROT_READ|PROT_WRITE, ioreq_pfn); if (shared_page == NULL) { fprintf(logfile, "map shared IO page returned error %d\n", errno); exit(-1);@@ -7504,15 +7644,12 @@ xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &ioreq_pfn); fprintf(logfile, "buffered io page at pfn %lx\n", ioreq_pfn); buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,- PROT_READ|PROT_WRITE,- page_array[ioreq_pfn]);+ PROT_READ|PROT_WRITE, ioreq_pfn); if (buffered_io_page == NULL) { fprintf(logfile, "map buffered IO page returned error %d\n", errno); exit(-1); } - free(page_array);- #elif defined(__ia64__) nr_pages = ram_size/PAGE_SIZE;Index: ioemu/target-i386-dm/exec-dm.c===================================================================--- ioemu.orig/target-i386-dm/exec-dm.c 2007-05-11 10:04:46.000000000 +0100+++ ioemu/target-i386-dm/exec-dm.c 2007-05-11 10:04:52.000000000 +0100@@ -36,6 +36,7 @@ #include "cpu.h" #include "exec-all.h"+#include "vl.h" //#define DEBUG_TB_INVALIDATE //#define DEBUG_FLUSH@@ -127,10 +128,17 @@ FILE *logfile; int loglevel; +#ifdef MAPCACHE+pthread_mutex_t mapcache_mutex;+#endif+ void cpu_exec_init(CPUState *env) { CPUState **penv; int cpu_index;+#ifdef MAPCACHE+ pthread_mutexattr_t mxattr; +#endif env->next_cpu = NULL; penv = &first_cpu;@@ -144,6 +152,14 @@ /* alloc dirty bits array */ phys_ram_dirty = qemu_malloc(phys_ram_size >> TARGET_PAGE_BITS);++#ifdef MAPCACHE+ /* setup memory access mutex to protect mapcache */+ pthread_mutexattr_init(&mxattr); + pthread_mutexattr_settype(&mxattr, PTHREAD_MUTEX_RECURSIVE);+ pthread_mutex_init(&mapcache_mutex, &mxattr); + pthread_mutexattr_destroy(&mxattr); +#endif } /* enable or disable low levels log */@@ -414,16 +430,11 @@ return 0; } -static inline int paddr_is_ram(target_phys_addr_t addr)-{- /* Is this guest physical address RAM-backed? */-#if defined(CONFIG_DM) && (defined(__i386__) || defined(__x86_64__))- return ((addr < HVM_BELOW_4G_MMIO_START) ||- (addr >= HVM_BELOW_4G_MMIO_START + HVM_BELOW_4G_MMIO_LENGTH));-#else- return (addr < ram_size);+#if defined(__i386__) || defined(__x86_64__)+#define phys_ram_addr(x) (qemu_map_cache(x))+#elif defined(__ia64__)+#define phys_ram_addr(x) ((addr < ram_size) ? (phys_ram_base + (x)) : NULL) #endif-} void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write)@@ -431,13 +442,15 @@ int l, io_index; uint8_t *ptr; uint32_t val;- ++ mapcache_lock();+ while (len > 0) { /* How much can we copy before the next page boundary? */ l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); if (l > len) l = len;- + io_index = iomem_index(addr); if (is_write) { if (io_index) {@@ -457,11 +470,11 @@ io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val); l = 1; }- } else if (paddr_is_ram(addr)) {+ } else if ((ptr = phys_ram_addr(addr)) != NULL) { /* Reading from RAM */- memcpy(phys_ram_base + addr, buf, l);+ memcpy(ptr, buf, l); #ifdef __ia64__- sync_icache((unsigned long)(phys_ram_base + addr), l);+ sync_icache(ptr, l); #endif } } else {@@ -482,9 +495,9 @@ stb_raw(buf, val); l = 1; }- } else if (paddr_is_ram(addr)) {+ } else if ((ptr = phys_ram_addr(addr)) != NULL) { /* Reading from RAM */- memcpy(buf, phys_ram_base + addr, l);+ memcpy(buf, ptr, l); } else { /* Neither RAM nor known MMIO space */ memset(buf, 0xff, len); @@ -494,6 +507,8 @@ buf += l; addr += l; }++ mapcache_unlock(); } #endif Index: ioemu/vl.h===================================================================--- ioemu.orig/vl.h 2007-05-11 10:04:51.000000000 +0100+++ ioemu/vl.h 2007-05-11 10:04:52.000000000 +0100@@ -159,6 +159,28 @@ extern FILE *logfile; ++#if defined(__i386__) || defined(__x86_64__)++#define MAPCACHE++uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);+void qemu_invalidate_map_cache(void);++#include <pthread.h>+extern pthread_mutex_t mapcache_mutex;+#define mapcache_lock() pthread_mutex_lock(&mapcache_mutex)+#define mapcache_unlock() pthread_mutex_unlock(&mapcache_mutex)++#else ++#define qemu_invalidate_map_cache() ((void)0)++#define mapcache_lock() ((void)0)+#define mapcache_unlock() ((void)0)++#endif+ extern int xc_handle; extern int domid; Index: ioemu/target-i386-dm/cpu.h===================================================================--- ioemu.orig/target-i386-dm/cpu.h 2007-05-11 10:04:47.000000000 +0100+++ ioemu/target-i386-dm/cpu.h 2007-05-11 10:04:52.000000000 +0100@@ -25,7 +25,8 @@ #ifdef TARGET_X86_64 #define TARGET_LONG_BITS 64 #else-#define TARGET_LONG_BITS 32+/* #define TARGET_LONG_BITS 32 */+#define TARGET_LONG_BITS 64 /* for Qemu map cache */ #endif /* target supports implicit self modifying code */Index: ioemu/target-i386-dm/helper2.c===================================================================--- ioemu.orig/target-i386-dm/helper2.c 2007-05-11 10:04:50.000000000 +0100+++ ioemu/target-i386-dm/helper2.c 2007-05-11 10:04:52.000000000 +0100@@ -526,6 +526,9 @@ case IOREQ_TYPE_TIMEOFFSET: cpu_ioreq_timeoffset(env, req); break;+ case IOREQ_TYPE_INVALIDATE:+ qemu_invalidate_map_cache();+ break; default: hw_error("Invalid ioreq type 0x%x\n", req->type); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -