hvm.c
来自「xen 3.2.2 源码」· C语言 代码 · 共 2,070 行 · 第 1/4 页
C
2,070 行
set_bit(_VPF_down, &v->pause_flags); clear_bit(_VPF_blocked, &v->pause_flags); v->fpu_initialised = 0; v->fpu_dirtied = 0; v->is_initialised = 0; vcpu_unpause(v);}static void hvm_vcpu_down(void){ struct vcpu *v = current; struct domain *d = v->domain; int online_count = 0; gdprintk(XENLOG_INFO, "VCPU%d: going offline.\n", v->vcpu_id); /* Doesn't halt us immediately, but we'll never return to guest context. */ set_bit(_VPF_down, &v->pause_flags); vcpu_sleep_nosync(v); /* Any other VCPUs online? ... */ LOCK_BIGLOCK(d); for_each_vcpu ( d, v ) if ( !test_bit(_VPF_down, &v->pause_flags) ) online_count++; UNLOCK_BIGLOCK(d); /* ... Shut down the domain if not. */ if ( online_count == 0 ) { gdprintk(XENLOG_INFO, "all CPUs offline -- powering off.\n"); domain_shutdown(d, SHUTDOWN_poweroff); }}void hvm_send_assist_req(struct vcpu *v){ ioreq_t *p; if ( unlikely(!vcpu_start_shutdown_deferral(v)) ) return; /* implicitly bins the i/o operation */ p = &get_ioreq(v)->vp_ioreq; if ( unlikely(p->state != STATE_IOREQ_NONE) ) { /* This indicates a bug in the device model. Crash the domain. */ gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state); domain_crash_synchronous(); } prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port); /* * Following happens /after/ blocking and setting up ioreq contents. * prepare_wait_on_xen_event_channel() is an implicit barrier. */ p->state = STATE_IOREQ_READY; notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);}void hvm_hlt(unsigned long rflags){ /* * If we halt with interrupts disabled, that's a pretty sure sign that we * want to shut down. In a real processor, NMIs are the only way to break * out of this. */ if ( unlikely(!(rflags & X86_EFLAGS_IF)) ) return hvm_vcpu_down(); do_sched_op_compat(SCHEDOP_block, 0);}void hvm_triple_fault(void){ struct vcpu *v = current; gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - " "invoking HVM system reset.\n", v->vcpu_id); domain_shutdown(v->domain, SHUTDOWN_reboot);}int hvm_set_efer(uint64_t value){ struct vcpu *v = current; value &= ~EFER_LMA; if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) || ((sizeof(long) != 8) && (value & EFER_LME)) || (!cpu_has_nx && (value & EFER_NX)) || (!cpu_has_syscall && (value & EFER_SCE)) || (!cpu_has_ffxsr && (value & EFER_FFXSE)) ) { gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " "EFER: %"PRIx64"\n", value); hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) && hvm_paging_enabled(v) ) { gdprintk(XENLOG_WARNING, "Trying to change EFER.LME with paging enabled\n"); hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA; v->arch.hvm_vcpu.guest_efer = value; hvm_update_guest_efer(v); return 1;}extern void shadow_blow_tables_per_domain(struct domain *d);extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);/* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */static bool_t domain_exit_uc_mode(struct vcpu *v){ struct domain *d = v->domain; struct vcpu *vs; for_each_vcpu ( d, vs ) { if ( (vs == v) || !vs->is_initialised ) continue; if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) || mtrr_pat_not_equal(vs, v) ) return 0; } return 1;}static void local_flush_cache(void *info){ wbinvd();}int hvm_set_cr0(unsigned long value){ struct vcpu *v = current; p2m_type_t p2mt; unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); if ( (u32)value != value ) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to set upper 32 bits in CR0: %lx", value); hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } value &= ~HVM_CR0_GUEST_RESERVED_BITS; /* ET is reserved and should be always be 1. */ value |= X86_CR0_ET; if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG ) { hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) ) { if ( v->arch.hvm_vcpu.guest_efer & EFER_LME ) { if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) ) { HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable"); hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode"); v->arch.hvm_vcpu.guest_efer |= EFER_LMA; hvm_update_guest_efer(v); } if ( !paging_mode_hap(v->domain) ) { /* The guest CR3 must be pointing to the guest physical. */ gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT; mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", v->arch.hvm_vcpu.guest_cr[3], mfn); domain_crash(v->domain); return 0; } /* Now arch.guest_table points to machine physical. */ v->arch.guest_table = pagetable_from_pfn(mfn); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", v->arch.hvm_vcpu.guest_cr[3], mfn); } } else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) ) { /* When CR0.PG is cleared, LMA is cleared immediately. */ if ( hvm_long_mode_enabled(v) ) { v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA; hvm_update_guest_efer(v); } if ( !paging_mode_hap(v->domain) ) { put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_null(); } } if ( !list_empty(&domain_hvm_iommu(v->domain)->pdev_list) ) { if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) { /* Entering no fill cache mode. */ spin_lock(&v->domain->arch.hvm_domain.uc_lock); v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE; if ( !v->domain->arch.hvm_domain.is_in_uc_mode ) { /* Flush physical caches. */ on_each_cpu(local_flush_cache, NULL, 1, 1); /* Shadow pagetables must recognise UC mode. */ v->domain->arch.hvm_domain.is_in_uc_mode = 1; shadow_blow_tables_per_domain(v->domain); } spin_unlock(&v->domain->arch.hvm_domain.uc_lock); } else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) && (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ) { /* Exit from no fill cache mode. */ spin_lock(&v->domain->arch.hvm_domain.uc_lock); v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE; if ( domain_exit_uc_mode(v) ) { /* Shadow pagetables must recognise normal caching mode. */ v->domain->arch.hvm_domain.is_in_uc_mode = 0; shadow_blow_tables_per_domain(v->domain); } spin_unlock(&v->domain->arch.hvm_domain.uc_lock); } } v->arch.hvm_vcpu.guest_cr[0] = value; hvm_update_guest_cr(v, 0); if ( (value ^ old_value) & X86_CR0_PG ) paging_update_paging_modes(v); return 1;}int hvm_set_cr3(unsigned long value){ unsigned long mfn; p2m_type_t p2mt; struct vcpu *v = current; if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && (value != v->arch.hvm_vcpu.guest_cr[3]) ) { /* Shadow-mode CR3 change. Check PDBR and update refcounts. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) goto bad_cr3; put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); } v->arch.hvm_vcpu.guest_cr[3] = value; paging_update_cr3(v); return 1; bad_cr3: gdprintk(XENLOG_ERR, "Invalid CR3\n"); domain_crash(v->domain); return 0;}int hvm_set_cr4(unsigned long value){ struct vcpu *v = current; unsigned long old_cr; if ( value & HVM_CR4_GUEST_RESERVED_BITS ) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to set reserved bit in CR4: %lx", value); goto gpf; } if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) ) { HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while " "EFER.LMA is set"); goto gpf; } old_cr = v->arch.hvm_vcpu.guest_cr[4]; v->arch.hvm_vcpu.guest_cr[4] = value; hvm_update_guest_cr(v, 4); /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */ if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) paging_update_paging_modes(v); return 1; gpf: hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0;}int hvm_virtual_to_linear_addr( enum x86_segment seg, struct segment_register *reg, unsigned long offset, unsigned int bytes, enum hvm_access_type access_type, unsigned int addr_size, unsigned long *linear_addr){ unsigned long addr = offset; uint32_t last_byte; if ( addr_size != 64 ) { /* * COMPATIBILITY MODE: Apply segment checks and add base. */ switch ( access_type ) { case hvm_access_read: if ( (reg->attr.fields.type & 0xa) == 0x8 ) goto gpf; /* execute-only code segment */ break; case hvm_access_write: if ( (reg->attr.fields.type & 0xa) != 0x2 ) goto gpf; /* not a writable data segment */ break; default: break; } last_byte = offset + bytes - 1; /* Is this a grows-down data segment? Special limit check if so. */ if ( (reg->attr.fields.type & 0xc) == 0x4 ) { /* Is upper limit 0xFFFF or 0xFFFFFFFF? */ if ( !reg->attr.fields.db ) last_byte = (uint16_t)last_byte; /* Check first byte and last byte against respective bounds. */ if ( (offset <= reg->limit) || (last_byte < offset) ) goto gpf; } else if ( (last_byte > reg->limit) || (last_byte < offset) ) goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */ /* * Hardware truncates to 32 bits in compatibility mode. * It does not truncate to 16 bits in 16-bit address-size mode. */ addr = (uint32_t)(addr + reg->base); } else { /* * LONG MODE: FS and GS add segment base. Addresses must be canonical. */ if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) ) addr += reg->base; if ( !is_canonical_address(addr) ) goto gpf; } *linear_addr = addr; return 1; gpf: return 0;}static void *hvm_map_entry(unsigned long va){ unsigned long gfn, mfn; p2m_type_t p2mt; uint32_t pfec; if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE ) { gdprintk(XENLOG_ERR, "Descriptor table entry " "straddles page boundary\n"); domain_crash(current->domain); return NULL; } /* We're mapping on behalf of the segment-load logic, which might * write the accessed flags in the descriptors (in 32-bit mode), but * we still treat it as a kernel-mode read (i.e. no access checks). */ pfec = PFEC_page_present; gfn = paging_gva_to_gfn(current, va, &pfec); mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); if ( !p2m_is_ram(p2mt) ) { gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n"); domain_crash(current->domain); return NULL; } ASSERT(mfn_valid(mfn)); paging_mark_dirty(current->domain, mfn); return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);}static void hvm_unmap_entry(void *p){ if ( p ) unmap_domain_page(p);}static int hvm_load_segment_selector( struct vcpu *v, enum x86_segment seg, uint16_t sel){ struct segment_register desctab, cs, segr; struct desc_struct *pdesc, desc; u8 dpl, rpl, cpl; int fault_type = TRAP_invalid_tss; /* NULL selector? */ if ( (sel & 0xfffc) == 0 ) { if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) ) goto fail; memset(&segr, 0, sizeof(segr)); hvm_set_segment_register(v, seg, &segr); return 0; } /* LDT descriptor must be in the GDT. */ if ( (seg == x86_seg_ldtr) && (sel & 4) ) goto fail; hvm_get_segment_register(v, x86_seg_cs, &cs); hvm_get_segment_register( v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab); /* Check against descriptor table limit. */ if ( ((sel & 0xfff8) + 7) > desctab.limit ) goto fail; pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8)); if ( pdesc == NULL ) goto hvm_map_fail; do { desc = *pdesc; /* Segment present in memory? */ if ( !(desc.b & (1u<<15)) ) { fault_type = TRAP_no_segment; goto unmap_and_fail; } /* LDT descriptor is a system segment. All others are code/data. */ if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) ) goto unmap_and_fail; dpl = (desc.b >> 13) & 3; rpl = sel & 3; cpl = cs.sel & 3; switch ( seg ) { case x86_seg_cs: /* Code segment? */ if ( !(desc.b & (1u<<11)) ) goto unmap_and_fail; /* Non-conforming segment: check DPL against RPL. */ if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) ) goto unmap_and_fail; break; case x86_seg_ss: /* Writable data segment? */ if ( (desc.b & (5u<<9)) != (1u<<9) ) goto unmap_and_fail; if ( (dpl != cpl) || (dpl != rpl) ) goto unmap_and_fail; break; case x86_seg_ldtr:
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?