hvm.c

来自「xen虚拟机源代码安装包」· C语言 代码 · 共 2,304 行 · 第 1/5 页

C
2,304
字号
        {            put_page(pagetable_get_page(v->arch.guest_table));            v->arch.guest_table = pagetable_null();        }    }    if ( has_arch_pdevs(v->domain) )    {        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )        {            /* Entering no fill cache mode. */            spin_lock(&v->domain->arch.hvm_domain.uc_lock);            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )            {                /* Flush physical caches. */                on_each_cpu(local_flush_cache, NULL, 1, 1);                hvm_set_uc_mode(v, 1);            }            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);        }        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )        {            /* Exit from no fill cache mode. */            spin_lock(&v->domain->arch.hvm_domain.uc_lock);            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;            if ( domain_exit_uc_mode(v) )                hvm_set_uc_mode(v, 0);            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);        }    }    v->arch.hvm_vcpu.guest_cr[0] = value;    hvm_update_guest_cr(v, 0);    if ( (value ^ old_value) & X86_CR0_PG )        paging_update_paging_modes(v);    return X86EMUL_OKAY; gpf:    hvm_inject_exception(TRAP_gp_fault, 0, 0);    return X86EMUL_EXCEPTION;}int hvm_set_cr3(unsigned long value){    unsigned long mfn;    p2m_type_t p2mt;    struct vcpu *v = current;    if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&         (value != v->arch.hvm_vcpu.guest_cr[3]) )    {        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||             !get_page(mfn_to_page(mfn), v->domain) )              goto bad_cr3;        put_page(pagetable_get_page(v->arch.guest_table));        v->arch.guest_table = pagetable_from_pfn(mfn);        HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value);    }    v->arch.hvm_vcpu.guest_cr[3] = value;    paging_update_cr3(v);    return X86EMUL_OKAY; bad_cr3:    gdprintk(XENLOG_ERR, "Invalid CR3\n");    domain_crash(v->domain);    return X86EMUL_UNHANDLEABLE;}int hvm_set_cr4(unsigned long value){    struct vcpu *v = current;    unsigned long old_cr;    if ( value & HVM_CR4_GUEST_RESERVED_BITS )    {        HVM_DBG_LOG(DBG_LEVEL_1,                    "Guest attempts to set reserved bit in CR4: %lx",                    value);        goto gpf;    }    if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) )    {        HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while "                    "EFER.LMA is set");        goto gpf;    }    old_cr = v->arch.hvm_vcpu.guest_cr[4];    v->arch.hvm_vcpu.guest_cr[4] = value;    hvm_update_guest_cr(v, 4);    /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */    if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) )        paging_update_paging_modes(v);    return X86EMUL_OKAY; gpf:    hvm_inject_exception(TRAP_gp_fault, 0, 0);    return X86EMUL_EXCEPTION;}int hvm_virtual_to_linear_addr(    enum x86_segment seg,    struct segment_register *reg,    unsigned long offset,    unsigned int bytes,    enum hvm_access_type access_type,    unsigned int addr_size,    unsigned long *linear_addr){    unsigned long addr = offset;    uint32_t last_byte;    if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )    {        /*         * REAL MODE: Don't bother with segment access checks.         * Certain of them are not done in native real mode anyway.         */        addr = (uint32_t)(addr + reg->base);    }    else if ( addr_size != 64 )    {        /*         * COMPATIBILITY MODE: Apply segment checks and add base.         */        switch ( access_type )        {        case hvm_access_read:            if ( (reg->attr.fields.type & 0xa) == 0x8 )                goto gpf; /* execute-only code segment */            break;        case hvm_access_write:            if ( (reg->attr.fields.type & 0xa) != 0x2 )                goto gpf; /* not a writable data segment */            break;        default:            break;        }        last_byte = offset + bytes - 1;        /* Is this a grows-down data segment? Special limit check if so. */        if ( (reg->attr.fields.type & 0xc) == 0x4 )        {            /* Is upper limit 0xFFFF or 0xFFFFFFFF? */            if ( !reg->attr.fields.db )                last_byte = (uint16_t)last_byte;            /* Check first byte and last byte against respective bounds. */            if ( (offset <= reg->limit) || (last_byte < offset) )                goto gpf;        }        else if ( (last_byte > reg->limit) || (last_byte < offset) )            goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */        /*         * Hardware truncates to 32 bits in compatibility mode.         * It does not truncate to 16 bits in 16-bit address-size mode.         */        addr = (uint32_t)(addr + reg->base);    }    else    {        /*         * LONG MODE: FS and GS add segment base. Addresses must be canonical.         */        if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )            addr += reg->base;        if ( !is_canonical_address(addr) )            goto gpf;    }    *linear_addr = addr;    return 1; gpf:    return 0;}static void *hvm_map_entry(unsigned long va){    unsigned long gfn, mfn;    p2m_type_t p2mt;    uint32_t pfec;    if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )    {        gdprintk(XENLOG_ERR, "Descriptor table entry "                 "straddles page boundary\n");        domain_crash(current->domain);        return NULL;    }    /* We're mapping on behalf of the segment-load logic, which might     * write the accessed flags in the descriptors (in 32-bit mode), but     * we still treat it as a kernel-mode read (i.e. no access checks). */    pfec = PFEC_page_present;    gfn = paging_gva_to_gfn(current, va, &pfec);    mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));    if ( !p2m_is_ram(p2mt) )    {        gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");        domain_crash(current->domain);        return NULL;    }    ASSERT(mfn_valid(mfn));    paging_mark_dirty(current->domain, mfn);    return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);}static void hvm_unmap_entry(void *p){    if ( p )        unmap_domain_page(p);}static int hvm_load_segment_selector(    struct vcpu *v, enum x86_segment seg, uint16_t sel){    struct segment_register desctab, cs, segr;    struct desc_struct *pdesc, desc;    u8 dpl, rpl, cpl;    int fault_type = TRAP_invalid_tss;    /* NULL selector? */    if ( (sel & 0xfffc) == 0 )    {        if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )            goto fail;        memset(&segr, 0, sizeof(segr));        hvm_set_segment_register(v, seg, &segr);        return 0;    }    /* LDT descriptor must be in the GDT. */    if ( (seg == x86_seg_ldtr) && (sel & 4) )        goto fail;    hvm_get_segment_register(v, x86_seg_cs, &cs);    hvm_get_segment_register(        v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);    /* Check against descriptor table limit. */    if ( ((sel & 0xfff8) + 7) > desctab.limit )        goto fail;    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));    if ( pdesc == NULL )        goto hvm_map_fail;    do {        desc = *pdesc;        /* Segment present in memory? */        if ( !(desc.b & (1u<<15)) )        {            fault_type = TRAP_no_segment;            goto unmap_and_fail;        }        /* LDT descriptor is a system segment. All others are code/data. */        if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )            goto unmap_and_fail;        dpl = (desc.b >> 13) & 3;        rpl = sel & 3;        cpl = cs.sel & 3;        switch ( seg )        {        case x86_seg_cs:            /* Code segment? */            if ( !(desc.b & (1u<<11)) )                goto unmap_and_fail;            /* Non-conforming segment: check DPL against RPL. */            if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )                goto unmap_and_fail;            break;        case x86_seg_ss:            /* Writable data segment? */            if ( (desc.b & (5u<<9)) != (1u<<9) )                goto unmap_and_fail;            if ( (dpl != cpl) || (dpl != rpl) )                goto unmap_and_fail;            break;        case x86_seg_ldtr:            /* LDT system segment? */            if ( (desc.b & (15u<<8)) != (2u<<8) )                goto unmap_and_fail;            goto skip_accessed_flag;        default:            /* Readable code or data segment? */            if ( (desc.b & (5u<<9)) == (4u<<9) )                goto unmap_and_fail;            /* Non-conforming segment: check DPL against RPL and CPL. */            if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )                goto unmap_and_fail;            break;        }    } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */              (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );    /* Force the Accessed flag in our local copy. */    desc.b |= 0x100; skip_accessed_flag:    hvm_unmap_entry(pdesc);    segr.base = (((desc.b <<  0) & 0xff000000u) |                 ((desc.b << 16) & 0x00ff0000u) |                 ((desc.a >> 16) & 0x0000ffffu));    segr.attr.bytes = (((desc.b >>  8) & 0x00ffu) |                       ((desc.b >> 12) & 0x0f00u));    segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);    if ( segr.attr.fields.g )        segr.limit = (segr.limit << 12) | 0xfffu;    segr.sel = sel;    hvm_set_segment_register(v, seg, &segr);    return 0; unmap_and_fail:    hvm_unmap_entry(pdesc); fail:    hvm_inject_exception(fault_type, sel & 0xfffc, 0); hvm_map_fail:    return 1;}void hvm_task_switch(    uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,    int32_t errcode){    struct vcpu *v = current;    struct cpu_user_regs *regs = guest_cpu_user_regs();    struct segment_register gdt, tr, prev_tr, segr;    struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;    unsigned long eflags;    int exn_raised, rc;    struct {        u16 back_link,__blh;        u32 esp0;        u16 ss0, _0;        u32 esp1;        u16 ss1, _1;        u32 esp2;        u16 ss2, _2;        u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;        u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;        u16 trace, iomap;    } tss = { 0 };    hvm_get_segment_register(v, x86_seg_gdtr, &gdt);    hvm_get_segment_register(v, x86_seg_tr, &prev_tr);    if ( ((tss_sel & 0xfff8) + 7) > gdt.limit )    {        hvm_inject_exception((taskswitch_reason == TSW_iret) ?                             TRAP_invalid_tss : TRAP_gp_fault,                             tss_sel & 0xfff8, 0);        goto out;    }    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));    if ( optss_desc == NULL )        goto out;    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));    if ( nptss_desc == NULL )        goto out;    tss_desc = *nptss_desc;    tr.sel = tss_sel;    tr.base = (((tss_desc.b <<  0) & 0xff000000u) |               ((tss_desc.b << 16) & 0x00ff0000u) |               ((tss_desc.a >> 16) & 0x0000ffffu));    tr.attr.bytes = (((tss_desc.b >>  8) & 0x00ffu) |                     ((tss_desc.b >> 12) & 0x0f00u));    tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu);    if ( tr.attr.fields.g )        tr.limit = (tr.limit << 12) | 0xfffu;    if ( !tr.attr.fields.p )    {        hvm_inject_exception(TRAP_no_segment, tss_sel & 0xfff8, 0);        goto out;    }    if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )    {        hvm_inject_exception(            (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault,            tss_sel & 0xfff8, 0);        goto out;    }    if ( tr.limit < (sizeof(tss)-1) )    {        hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);        goto out;    }    rc = hvm_copy_from_guest_virt(        &tss, prev_tr.base, sizeof(tss), PFEC_page_present);    if ( rc == HVMCOPY_bad_gva_to_gfn )        goto out;    eflags = regs->eflags;    if ( taskswitch_reason == TSW_iret )        eflags &= ~X86_EFLAGS_NT;    tss.cr3    = v->arch.hvm_vcpu.guest_cr[3];    tss.eip    = regs->eip;    tss.eflags = eflags;    tss.eax    = regs->eax;    tss.ecx    = regs->ecx;    tss.edx    = regs->edx;    tss.ebx    = regs->ebx;    tss.esp    = regs->esp;    tss.ebp    = regs->ebp;    tss.esi    = regs->esi;    tss.edi    = regs->edi;    hvm_get_segment_register(v, x86_seg_es, &segr);    tss.es = segr.sel;    hvm_get_segment_register(v, x86_seg_cs, &segr);    tss.cs = segr.sel;    hvm_get_segment_register(v, x86_seg_ss, &segr);    tss.ss = segr.sel;    hvm_get_segment_register(v, x86_seg_ds, &segr);    tss.ds = segr.sel;    hvm_get_segment_register(v, x86_seg_fs, &segr);    tss.fs = segr.sel;    hvm_get_segment_register(v, x86_seg_gs, &segr);    tss.gs = segr.sel;    hvm_get_segment_register(v, x86_seg_ldtr, &segr);    tss.ldt = segr.sel;    rc = hvm_copy_to_guest_virt(

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?