hvm.c

来自「xen 3.2.2 源码」· C语言 代码 · 共 2,070 行 · 第 1/4 页

C
2,070
字号
    set_bit(_VPF_down, &v->pause_flags);    clear_bit(_VPF_blocked, &v->pause_flags);    v->fpu_initialised = 0;    v->fpu_dirtied     = 0;    v->is_initialised  = 0;    vcpu_unpause(v);}static void hvm_vcpu_down(void){    struct vcpu *v = current;    struct domain *d = v->domain;    int online_count = 0;    gdprintk(XENLOG_INFO, "VCPU%d: going offline.\n", v->vcpu_id);    /* Doesn't halt us immediately, but we'll never return to guest context. */    set_bit(_VPF_down, &v->pause_flags);    vcpu_sleep_nosync(v);    /* Any other VCPUs online? ... */    LOCK_BIGLOCK(d);    for_each_vcpu ( d, v )        if ( !test_bit(_VPF_down, &v->pause_flags) )            online_count++;    UNLOCK_BIGLOCK(d);    /* ... Shut down the domain if not. */    if ( online_count == 0 )    {        gdprintk(XENLOG_INFO, "all CPUs offline -- powering off.\n");        domain_shutdown(d, SHUTDOWN_poweroff);    }}void hvm_send_assist_req(struct vcpu *v){    ioreq_t *p;    if ( unlikely(!vcpu_start_shutdown_deferral(v)) )        return; /* implicitly bins the i/o operation */    p = &get_ioreq(v)->vp_ioreq;    if ( unlikely(p->state != STATE_IOREQ_NONE) )    {        /* This indicates a bug in the device model.  Crash the domain. */        gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);        domain_crash_synchronous();    }    prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);    /*     * Following happens /after/ blocking and setting up ioreq contents.     * prepare_wait_on_xen_event_channel() is an implicit barrier.     */    p->state = STATE_IOREQ_READY;    notify_via_xen_event_channel(v->arch.hvm_vcpu.xen_port);}void hvm_hlt(unsigned long rflags){    /*     * If we halt with interrupts disabled, that's a pretty sure sign that we     * want to shut down. In a real processor, NMIs are the only way to break     * out of this.     */    if ( unlikely(!(rflags & X86_EFLAGS_IF)) )        return hvm_vcpu_down();    do_sched_op_compat(SCHEDOP_block, 0);}void hvm_triple_fault(void){    struct vcpu *v = current;    gdprintk(XENLOG_INFO, "Triple fault on VCPU%d - "             "invoking HVM system reset.\n", v->vcpu_id);    domain_shutdown(v->domain, SHUTDOWN_reboot);}int hvm_set_efer(uint64_t value){    struct vcpu *v = current;    value &= ~EFER_LMA;    if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) ||         ((sizeof(long) != 8) && (value & EFER_LME)) ||         (!cpu_has_nx && (value & EFER_NX)) ||         (!cpu_has_syscall && (value & EFER_SCE)) ||         (!cpu_has_ffxsr && (value & EFER_FFXSE)) )    {        gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "                 "EFER: %"PRIx64"\n", value);        hvm_inject_exception(TRAP_gp_fault, 0, 0);        return 0;    }    if ( ((value ^ v->arch.hvm_vcpu.guest_efer) & EFER_LME) &&         hvm_paging_enabled(v) )    {        gdprintk(XENLOG_WARNING,                 "Trying to change EFER.LME with paging enabled\n");        hvm_inject_exception(TRAP_gp_fault, 0, 0);        return 0;    }    value |= v->arch.hvm_vcpu.guest_efer & EFER_LMA;    v->arch.hvm_vcpu.guest_efer = value;    hvm_update_guest_efer(v);    return 1;}extern void shadow_blow_tables_per_domain(struct domain *d);extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);/* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */static bool_t domain_exit_uc_mode(struct vcpu *v){    struct domain *d = v->domain;    struct vcpu *vs;    for_each_vcpu ( d, vs )    {        if ( (vs == v) || !vs->is_initialised )            continue;        if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||             mtrr_pat_not_equal(vs, v) )            return 0;    }    return 1;}static void local_flush_cache(void *info){    wbinvd();}int hvm_set_cr0(unsigned long value){    struct vcpu *v = current;    p2m_type_t p2mt;    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];    HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);    if ( (u32)value != value )    {        HVM_DBG_LOG(DBG_LEVEL_1,                    "Guest attempts to set upper 32 bits in CR0: %lx",                    value);        hvm_inject_exception(TRAP_gp_fault, 0, 0);        return 0;    }    value &= ~HVM_CR0_GUEST_RESERVED_BITS;    /* ET is reserved and should be always be 1. */    value |= X86_CR0_ET;    if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG )    {        hvm_inject_exception(TRAP_gp_fault, 0, 0);        return 0;    }    if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) )    {        if ( v->arch.hvm_vcpu.guest_efer & EFER_LME )        {            if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) )            {                HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable");                hvm_inject_exception(TRAP_gp_fault, 0, 0);                return 0;            }            HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode");            v->arch.hvm_vcpu.guest_efer |= EFER_LMA;            hvm_update_guest_efer(v);        }        if ( !paging_mode_hap(v->domain) )        {            /* The guest CR3 must be pointing to the guest physical. */            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||                 !get_page(mfn_to_page(mfn), v->domain))            {                gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n",                         v->arch.hvm_vcpu.guest_cr[3], mfn);                domain_crash(v->domain);                return 0;            }            /* Now arch.guest_table points to machine physical. */            v->arch.guest_table = pagetable_from_pfn(mfn);            HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",                        v->arch.hvm_vcpu.guest_cr[3], mfn);        }    }    else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) )    {        /* When CR0.PG is cleared, LMA is cleared immediately. */        if ( hvm_long_mode_enabled(v) )        {            v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA;            hvm_update_guest_efer(v);        }        if ( !paging_mode_hap(v->domain) )        {            put_page(pagetable_get_page(v->arch.guest_table));            v->arch.guest_table = pagetable_null();        }    }    if ( !list_empty(&domain_hvm_iommu(v->domain)->pdev_list) )    {        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )        {            /* Entering no fill cache mode. */            spin_lock(&v->domain->arch.hvm_domain.uc_lock);            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )            {                /* Flush physical caches. */                on_each_cpu(local_flush_cache, NULL, 1, 1);                /* Shadow pagetables must recognise UC mode. */                v->domain->arch.hvm_domain.is_in_uc_mode = 1;                shadow_blow_tables_per_domain(v->domain);            }            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);        }        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )        {            /* Exit from no fill cache mode. */            spin_lock(&v->domain->arch.hvm_domain.uc_lock);            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;            if ( domain_exit_uc_mode(v) )            {                /* Shadow pagetables must recognise normal caching mode. */                v->domain->arch.hvm_domain.is_in_uc_mode = 0;                shadow_blow_tables_per_domain(v->domain);            }            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);        }    }    v->arch.hvm_vcpu.guest_cr[0] = value;    hvm_update_guest_cr(v, 0);    if ( (value ^ old_value) & X86_CR0_PG )        paging_update_paging_modes(v);    return 1;}int hvm_set_cr3(unsigned long value){    unsigned long mfn;    p2m_type_t p2mt;    struct vcpu *v = current;    if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&         (value != v->arch.hvm_vcpu.guest_cr[3]) )    {        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||             !get_page(mfn_to_page(mfn), v->domain) )              goto bad_cr3;        put_page(pagetable_get_page(v->arch.guest_table));        v->arch.guest_table = pagetable_from_pfn(mfn);        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);    }    v->arch.hvm_vcpu.guest_cr[3] = value;    paging_update_cr3(v);    return 1; bad_cr3:    gdprintk(XENLOG_ERR, "Invalid CR3\n");    domain_crash(v->domain);    return 0;}int hvm_set_cr4(unsigned long value){    struct vcpu *v = current;    unsigned long old_cr;    if ( value & HVM_CR4_GUEST_RESERVED_BITS )    {        HVM_DBG_LOG(DBG_LEVEL_1,                    "Guest attempts to set reserved bit in CR4: %lx",                    value);        goto gpf;    }    if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )    {        HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "                    "EFER.LMA is set");        goto gpf;    }    old_cr = v->arch.hvm_vcpu.guest_cr[4];    v->arch.hvm_vcpu.guest_cr[4] = value;    hvm_update_guest_cr(v, 4);    /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */    if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )        paging_update_paging_modes(v);    return 1; gpf:    hvm_inject_exception(TRAP_gp_fault, 0, 0);    return 0;}int hvm_virtual_to_linear_addr(    enum x86_segment seg,    struct segment_register *reg,    unsigned long offset,    unsigned int bytes,    enum hvm_access_type access_type,    unsigned int addr_size,    unsigned long *linear_addr){    unsigned long addr = offset;    uint32_t last_byte;    if ( addr_size != 64 )    {        /*         * COMPATIBILITY MODE: Apply segment checks and add base.         */        switch ( access_type )        {        case hvm_access_read:            if ( (reg->attr.fields.type & 0xa) == 0x8 )                goto gpf; /* execute-only code segment */            break;        case hvm_access_write:            if ( (reg->attr.fields.type & 0xa) != 0x2 )                goto gpf; /* not a writable data segment */            break;        default:            break;        }        last_byte = offset + bytes - 1;        /* Is this a grows-down data segment? Special limit check if so. */        if ( (reg->attr.fields.type & 0xc) == 0x4 )        {            /* Is upper limit 0xFFFF or 0xFFFFFFFF? */            if ( !reg->attr.fields.db )                last_byte = (uint16_t)last_byte;            /* Check first byte and last byte against respective bounds. */            if ( (offset <= reg->limit) || (last_byte < offset) )                goto gpf;        }        else if ( (last_byte > reg->limit) || (last_byte < offset) )            goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */        /*         * Hardware truncates to 32 bits in compatibility mode.         * It does not truncate to 16 bits in 16-bit address-size mode.         */        addr = (uint32_t)(addr + reg->base);    }    else    {        /*         * LONG MODE: FS and GS add segment base. Addresses must be canonical.         */        if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )            addr += reg->base;        if ( !is_canonical_address(addr) )            goto gpf;    }    *linear_addr = addr;    return 1; gpf:    return 0;}static void *hvm_map_entry(unsigned long va){    unsigned long gfn, mfn;    p2m_type_t p2mt;    uint32_t pfec;    if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )    {        gdprintk(XENLOG_ERR, "Descriptor table entry "                 "straddles page boundary\n");        domain_crash(current->domain);        return NULL;    }    /* We're mapping on behalf of the segment-load logic, which might     * write the accessed flags in the descriptors (in 32-bit mode), but     * we still treat it as a kernel-mode read (i.e. no access checks). */    pfec = PFEC_page_present;    gfn = paging_gva_to_gfn(current, va, &pfec);    mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));    if ( !p2m_is_ram(p2mt) )    {        gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");        domain_crash(current->domain);        return NULL;    }    ASSERT(mfn_valid(mfn));    paging_mark_dirty(current->domain, mfn);    return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);}static void hvm_unmap_entry(void *p){    if ( p )        unmap_domain_page(p);}static int hvm_load_segment_selector(    struct vcpu *v, enum x86_segment seg, uint16_t sel){    struct segment_register desctab, cs, segr;    struct desc_struct *pdesc, desc;    u8 dpl, rpl, cpl;    int fault_type = TRAP_invalid_tss;    /* NULL selector? */    if ( (sel & 0xfffc) == 0 )    {        if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )            goto fail;        memset(&segr, 0, sizeof(segr));        hvm_set_segment_register(v, seg, &segr);        return 0;    }    /* LDT descriptor must be in the GDT. */    if ( (seg == x86_seg_ldtr) && (sel & 4) )        goto fail;    hvm_get_segment_register(v, x86_seg_cs, &cs);    hvm_get_segment_register(        v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);    /* Check against descriptor table limit. */    if ( ((sel & 0xfff8) + 7) > desctab.limit )        goto fail;    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));    if ( pdesc == NULL )        goto hvm_map_fail;    do {        desc = *pdesc;        /* Segment present in memory? */        if ( !(desc.b & (1u<<15)) )        {            fault_type = TRAP_no_segment;            goto unmap_and_fail;        }        /* LDT descriptor is a system segment. All others are code/data. */        if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )            goto unmap_and_fail;        dpl = (desc.b >> 13) & 3;        rpl = sel & 3;        cpl = cs.sel & 3;        switch ( seg )        {        case x86_seg_cs:            /* Code segment? */            if ( !(desc.b & (1u<<11)) )                goto unmap_and_fail;            /* Non-conforming segment: check DPL against RPL. */            if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )                goto unmap_and_fail;            break;        case x86_seg_ss:            /* Writable data segment? */            if ( (desc.b & (5u<<9)) != (1u<<9) )                goto unmap_and_fail;            if ( (dpl != cpl) || (dpl != rpl) )                goto unmap_and_fail;            break;        case x86_seg_ldtr:

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?