📄 domain.c

📁 xen 3.2.2 源码
💻 C
📖 第 1 页 / 共 4 页
字号:
        rc = -EFAULT;        if ( copy_to_guest(arg, &cpu_id, 1) )            break;        rc = 0;        break;    }    default:        rc = -ENOSYS;        break;    }    return rc;}#ifdef __x86_64__#define loadsegment(seg,value) ({               \    int __r = 1;                                \    asm volatile (                              \        "1: movl %k1,%%" #seg "\n2:\n"          \        ".section .fixup,\"ax\"\n"              \        "3: xorl %k0,%k0\n"                     \        "   movl %k0,%%" #seg "\n"              \        "   jmp 2b\n"                           \        ".previous\n"                           \        ".section __ex_table,\"a\"\n"           \        "   .align 8\n"                         \        "   .quad 1b,3b\n"                      \        ".previous"                             \        : "=r" (__r) : "r" (value), "0" (__r) );\    __r; })/* * save_segments() writes a mask of segments which are dirty (non-zero), * allowing load_segments() to avoid some expensive segment loads and * MSR writes. */static DEFINE_PER_CPU(unsigned int, dirty_segment_mask);#define DIRTY_DS           0x01#define DIRTY_ES           0x02#define DIRTY_FS           0x04#define DIRTY_GS           0x08#define DIRTY_FS_BASE      0x10#define DIRTY_GS_BASE_USER 0x20static void load_segments(struct vcpu *n){    struct vcpu_guest_context *nctxt = &n->arch.guest_context;    int all_segs_okay = 1;    unsigned int dirty_segment_mask, cpu = smp_processor_id();    /* Load and clear the dirty segment mask. */    dirty_segment_mask = per_cpu(dirty_segment_mask, cpu);    per_cpu(dirty_segment_mask, cpu) = 0;    /* Either selector != 0 ==> reload. */    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )        all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);    /* Either selector != 0 ==> reload. */    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )        all_segs_okay &= loadsegment(es, nctxt->user_regs.es);    /*     * Either selector != 0 ==> reload.     * Also reload to reset FS_BASE if it was non-zero.     */    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |                  nctxt->user_regs.fs) )        all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);    /*     * Either selector != 0 ==> reload.     * Also reload to reset GS_BASE if it was non-zero.     */    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |                  nctxt->user_regs.gs) )    {        /* Reset GS_BASE with user %gs? */        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )            all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);    }    if ( !is_pv_32on64_domain(n->domain) )    {        /* This can only be non-zero if selector is NULL. */        if ( nctxt->fs_base )            wrmsr(MSR_FS_BASE,                  nctxt->fs_base,                  nctxt->fs_base>>32);        /* Most kernels have non-zero GS base, so don't bother testing. */        /* (This is also a serialising instruction, avoiding AMD erratum #88.) */        wrmsr(MSR_SHADOW_GS_BASE,              nctxt->gs_base_kernel,              nctxt->gs_base_kernel>>32);        /* This can only be non-zero if selector is NULL. */        if ( nctxt->gs_base_user )            wrmsr(MSR_GS_BASE,                  nctxt->gs_base_user,                  nctxt->gs_base_user>>32);        /* If in kernel mode then switch the GS bases around. */        if ( (n->arch.flags & TF_kernel_mode) )            asm volatile ( "swapgs" );    }    if ( unlikely(!all_segs_okay) )    {        struct cpu_user_regs *regs = guest_cpu_user_regs();        unsigned long *rsp =            (n->arch.flags & TF_kernel_mode) ?            (unsigned long *)regs->rsp :            (unsigned long *)nctxt->kernel_sp;        unsigned long cs_and_mask, rflags;        if ( is_pv_32on64_domain(n->domain) )        {            unsigned int *esp = ring_1(regs) ?                                (unsigned int *)regs->rsp :                                (unsigned int *)nctxt->kernel_sp;            unsigned int cs_and_mask, eflags;            int ret = 0;            /* CS longword also contains full evtchn_upcall_mask. */            cs_and_mask = (unsigned short)regs->cs |                ((unsigned int)vcpu_info(n, evtchn_upcall_mask) << 16);            /* Fold upcall mask into RFLAGS.IF. */            eflags  = regs->_eflags & ~X86_EFLAGS_IF;            eflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;            if ( !ring_1(regs) )            {                ret  = put_user(regs->ss,       esp-1);                ret |= put_user(regs->_esp,     esp-2);                esp -= 2;            }            if ( ret |                 put_user(eflags,              esp-1) |                 put_user(cs_and_mask,         esp-2) |                 put_user(regs->_eip,          esp-3) |                 put_user(nctxt->user_regs.gs, esp-4) |                 put_user(nctxt->user_regs.fs, esp-5) |                 put_user(nctxt->user_regs.es, esp-6) |                 put_user(nctxt->user_regs.ds, esp-7) )            {                gdprintk(XENLOG_ERR, "Error while creating compat "                         "failsafe callback frame.\n");                domain_crash(n->domain);            }            if ( test_bit(_VGCF_failsafe_disables_events,                          &n->arch.guest_context.flags) )                vcpu_info(n, evtchn_upcall_mask) = 1;            regs->entry_vector  = TRAP_syscall;            regs->_eflags      &= 0xFFFCBEFFUL;            regs->ss            = FLAT_COMPAT_KERNEL_SS;            regs->_esp          = (unsigned long)(esp-7);            regs->cs            = FLAT_COMPAT_KERNEL_CS;            regs->_eip          = nctxt->failsafe_callback_eip;            return;        }        if ( !(n->arch.flags & TF_kernel_mode) )            toggle_guest_mode(n);        else            regs->cs &= ~3;        /* CS longword also contains full evtchn_upcall_mask. */        cs_and_mask = (unsigned long)regs->cs |            ((unsigned long)vcpu_info(n, evtchn_upcall_mask) << 32);        /* Fold upcall mask into RFLAGS.IF. */        rflags  = regs->rflags & ~X86_EFLAGS_IF;        rflags |= !vcpu_info(n, evtchn_upcall_mask) << 9;        if ( put_user(regs->ss,            rsp- 1) |             put_user(regs->rsp,           rsp- 2) |             put_user(rflags,              rsp- 3) |             put_user(cs_and_mask,         rsp- 4) |             put_user(regs->rip,           rsp- 5) |             put_user(nctxt->user_regs.gs, rsp- 6) |             put_user(nctxt->user_regs.fs, rsp- 7) |             put_user(nctxt->user_regs.es, rsp- 8) |             put_user(nctxt->user_regs.ds, rsp- 9) |             put_user(regs->r11,           rsp-10) |             put_user(regs->rcx,           rsp-11) )        {            gdprintk(XENLOG_ERR, "Error while creating failsafe "                    "callback frame.\n");            domain_crash(n->domain);        }        if ( test_bit(_VGCF_failsafe_disables_events,                      &n->arch.guest_context.flags) )            vcpu_info(n, evtchn_upcall_mask) = 1;        regs->entry_vector  = TRAP_syscall;        regs->rflags       &= ~(X86_EFLAGS_AC|X86_EFLAGS_VM|X86_EFLAGS_RF|                                X86_EFLAGS_NT|X86_EFLAGS_TF);        regs->ss            = FLAT_KERNEL_SS;        regs->rsp           = (unsigned long)(rsp-11);        regs->cs            = FLAT_KERNEL_CS;        regs->rip           = nctxt->failsafe_callback_eip;    }}static void save_segments(struct vcpu *v){    struct vcpu_guest_context *ctxt = &v->arch.guest_context;    struct cpu_user_regs      *regs = &ctxt->user_regs;    unsigned int dirty_segment_mask = 0;    regs->ds = read_segment_register(ds);    regs->es = read_segment_register(es);    regs->fs = read_segment_register(fs);    regs->gs = read_segment_register(gs);    if ( regs->ds )        dirty_segment_mask |= DIRTY_DS;    if ( regs->es )        dirty_segment_mask |= DIRTY_ES;    if ( regs->fs || is_pv_32on64_domain(v->domain) )    {        dirty_segment_mask |= DIRTY_FS;        ctxt->fs_base = 0; /* != 0 selector kills fs_base */    }    else if ( ctxt->fs_base )    {        dirty_segment_mask |= DIRTY_FS_BASE;    }    if ( regs->gs || is_pv_32on64_domain(v->domain) )    {        dirty_segment_mask |= DIRTY_GS;        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */    }    else if ( ctxt->gs_base_user )    {        dirty_segment_mask |= DIRTY_GS_BASE_USER;    }    this_cpu(dirty_segment_mask) = dirty_segment_mask;}#define switch_kernel_stack(v) ((void)0)#elif defined(__i386__)#define load_segments(n) ((void)0)#define save_segments(p) ((void)0)static inline void switch_kernel_stack(struct vcpu *v){    struct tss_struct *tss = &init_tss[smp_processor_id()];    tss->esp1 = v->arch.guest_context.kernel_sp;    tss->ss1  = v->arch.guest_context.kernel_ss;}#endif /* __i386__ */static void paravirt_ctxt_switch_from(struct vcpu *v){    save_segments(v);    /*     * Disable debug breakpoints. We do this aggressively because if we switch     * to an HVM guest we may load DR0-DR3 with values that can cause #DE     * inside Xen, before we get a chance to reload DR7, and this cannot always     * safely be handled.     */    if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )        write_debugreg(7, 0);}static void paravirt_ctxt_switch_to(struct vcpu *v){    unsigned long cr4;    set_int80_direct_trap(v);    switch_kernel_stack(v);    cr4 = pv_guest_cr4_to_real_cr4(v->arch.guest_context.ctrlreg[4]);    if ( unlikely(cr4 != read_cr4()) )        write_cr4(cr4);    if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )    {        write_debugreg(0, v->arch.guest_context.debugreg[0]);        write_debugreg(1, v->arch.guest_context.debugreg[1]);        write_debugreg(2, v->arch.guest_context.debugreg[2]);        write_debugreg(3, v->arch.guest_context.debugreg[3]);        write_debugreg(6, v->arch.guest_context.debugreg[6]);        write_debugreg(7, v->arch.guest_context.debugreg[7]);    }}static void __context_switch(void){    struct cpu_user_regs *stack_regs = guest_cpu_user_regs();    unsigned int          cpu = smp_processor_id();    struct vcpu          *p = per_cpu(curr_vcpu, cpu);    struct vcpu          *n = current;    ASSERT(p != n);    ASSERT(cpus_empty(n->vcpu_dirty_cpumask));    if ( !is_idle_vcpu(p) )    {        memcpy(&p->arch.guest_context.user_regs,               stack_regs,               CTXT_SWITCH_STACK_BYTES);        unlazy_fpu(p);        p->arch.ctxt_switch_from(p);    }    if ( !is_idle_vcpu(n) )    {        memcpy(stack_regs,               &n->arch.guest_context.user_regs,               CTXT_SWITCH_STACK_BYTES);        n->arch.ctxt_switch_to(n);    }    if ( p->domain != n->domain )        cpu_set(cpu, n->domain->domain_dirty_cpumask);    cpu_set(cpu, n->vcpu_dirty_cpumask);    write_ptbase(n);    if ( p->vcpu_id != n->vcpu_id )    {        char gdt_load[10];        *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;        *(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(n);        asm volatile ( "lgdt %0" : "=m" (gdt_load) );    }    if ( p->domain != n->domain )        cpu_clear(cpu, p->domain->domain_dirty_cpumask);    cpu_clear(cpu, p->vcpu_dirty_cpumask);    per_cpu(curr_vcpu, cpu) = n;}void context_switch(struct vcpu *prev, struct vcpu *next){    unsigned int cpu = smp_processor_id();    cpumask_t dirty_mask = next->vcpu_dirty_cpumask;    ASSERT(local_irq_is_enabled());    /* Allow at most one CPU at a time to be dirty. */    ASSERT(cpus_weight(dirty_mask) <= 1);    if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) )    {        /* Other cpus call __sync_lazy_execstate from flush ipi handler. */        if ( !cpus_empty(next->vcpu_dirty_cpumask) )            flush_tlb_mask(next->vcpu_dirty_cpumask);    }    local_irq_disable();    if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )        pt_save_timer(prev);    set_current(next);    if ( (per_cpu(curr_vcpu, cpu) == next) || is_idle_vcpu(next) )    {        local_irq_enable();    }    else    {        __context_switch();#ifdef CONFIG_COMPAT        if ( !is_hvm_vcpu(next) &&             (is_idle_vcpu(prev) ||              is_hvm_vcpu(prev) ||              is_pv_32on64_vcpu(prev) != is_pv_32on64_vcpu(next)) )        {            uint64_t efer = read_efer();            if ( !(efer & EFER_SCE) )                write_efer(efer | EFER_SCE);            flush_tlb_one_local(GDT_VIRT_START(next) +                                FIRST_RESERVED_GDT_BYTE);        }#endif        /* Re-enable interrupts before restoring state which may fault. */        local_irq_enable();        if ( !is_hvm_vcpu(next) )        {            load_LDT(next);            load_segments(next);        }    }    context_saved(prev);    /* Update per-VCPU guest runstate shared memory area (if registered). */    if ( !guest_handle_is_null(runstate_guest(next)) )    {        if ( !is_pv_32on64_domain(next->domain) )            __copy_to_guest(runstate_guest(next), &next->runstate, 1);#ifdef CONFIG_COMPAT        else        {            struct compat_vcpu_runstate_info info;            XLAT_vcpu_runstate_info(&info, &next->runstate);            __copy_to_guest(next->runstate_guest.compat, &info, 1);        }#endif    }    schedule_tail(next);    BUG();}void continue_running(struct vcpu *same){    schedule_tail(same);    BUG();}int __sync_lazy_execstate(void){    unsigned long flags;    int switch_required;    local_irq_save(flags);    switch_required = (this_cpu(curr_vcpu) != current);    if ( switch_required )    {        ASSERT(current == idle_vcpu[smp_processor_id()]);        __context_switch();    }    local_irq_restore(flags);    return switch_required;}void sync_vcpu_execstate(struct vcpu *v){    if ( cpu_isset(smp_processor_id(), v->vcpu_dirty_cpumask) )        (void)__sync_lazy_execstate();    /* Other cpus call __sync_lazy_execstate from flush ipi handler. */    flush_tlb_mask(v->vcpu_dirty_cpumask);}struct migrate_info {    long (*func)(void *data);    void *data;    void (*saved_schedule_tail)(struct vcpu *);    cpumask_t saved_affinity;};static void continue_hypercall_on_cpu_helper(struct vcpu *v){    struct cpu_user_regs *regs = guest_cpu_user_regs();    struct migrate_info *info = v->arch.continue_info;    cpumask_t mask = info->saved_affinity;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -