📄 domain.c
字号:
if (unlikely(rbs_size > sizeof(c.nat->regs.rbs))) gdprintk(XENLOG_INFO, "rbs_size is too large 0x%x > 0x%lx\n", rbs_size, sizeof(c.nat->regs.rbs)); else memcpy(c.nat->regs.rbs, rbs_bottom, rbs_size); rbs_top = (unsigned long*)((char *)rbs_bottom + rbs_size) - 1; rbs_rnat_addr = ia64_rse_rnat_addr(rbs_top); if ((unsigned long)rbs_rnat_addr >= sw->ar_bspstore) rbs_rnat_addr = &sw->ar_rnat; top_slot = ia64_rse_slot_num(rbs_top); c.nat->regs.rbs_rnat = (*rbs_rnat_addr) & ((1UL << top_slot) - 1); if (ia64_rse_rnat_addr(rbs_bottom) == ia64_rse_rnat_addr(rbs_top)) { unsigned int bottom_slot = ia64_rse_slot_num(rbs_bottom); c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1); } c.nat->regs.num_phys_stacked = num_phys_stacked; if (VMX_DOMAIN(v)) c.nat->privregs_pfn = VGC_PRIVREGS_HVM; else c.nat->privregs_pfn = get_gpfn_from_mfn( virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT); for (i = 0; i < IA64_NUM_DBG_REGS; i++) { if (VMX_DOMAIN(v)) { vmx_vcpu_get_dbr(v, i, &c.nat->regs.dbr[i]); vmx_vcpu_get_ibr(v, i, &c.nat->regs.ibr[i]); } else { vcpu_get_dbr(v, i, &c.nat->regs.dbr[i]); vcpu_get_ibr(v, i, &c.nat->regs.ibr[i]); } } for (i = 0; i < 8; i++) vcpu_get_rr(v, (unsigned long)i << 61, &c.nat->regs.rr[i]); /* Fill extra regs. */ for (i = 0; (i < sizeof(tr->itrs) / sizeof(tr->itrs[0])) && i < NITRS; i++) { tr->itrs[i].pte = v->arch.itrs[i].pte.val; tr->itrs[i].itir = v->arch.itrs[i].itir; tr->itrs[i].vadr = v->arch.itrs[i].vadr; tr->itrs[i].rid = v->arch.itrs[i].rid; } for (i = 0; (i < sizeof(tr->dtrs) / sizeof(tr->dtrs[0])) && i < NDTRS; i++) { tr->dtrs[i].pte = v->arch.dtrs[i].pte.val; tr->dtrs[i].itir = v->arch.dtrs[i].itir; tr->dtrs[i].vadr = v->arch.dtrs[i].vadr; tr->dtrs[i].rid = v->arch.dtrs[i].rid; } c.nat->event_callback_ip = v->arch.event_callback_ip; /* If PV and privregs is not set, we can't read mapped registers. */ if (!is_hvm_vcpu(v) && v->arch.privregs == NULL) return; vcpu_get_dcr(v, &c.nat->regs.cr.dcr); c.nat->regs.cr.itm = is_hvm_vcpu(v) ? vmx_vcpu_get_itm(v) : PSCBX(v, domain_itm); vcpu_get_iva(v, &c.nat->regs.cr.iva); vcpu_get_pta(v, &c.nat->regs.cr.pta); vcpu_get_ipsr(v, &c.nat->regs.cr.ipsr); vcpu_get_isr(v, &c.nat->regs.cr.isr); vcpu_get_iip(v, &c.nat->regs.cr.iip); vcpu_get_ifa(v, &c.nat->regs.cr.ifa); vcpu_get_itir(v, &c.nat->regs.cr.itir); vcpu_get_iha(v, &c.nat->regs.cr.iha); //XXX change irr[] and arch.insvc[] if (is_hvm_vcpu(v)) /* c.nat->regs.cr.ivr = vmx_vcpu_get_ivr(v)*/;//XXXnot SMP-safe else vcpu_get_ivr (v, &c.nat->regs.cr.ivr); vcpu_get_iim(v, &c.nat->regs.cr.iim); vcpu_get_tpr(v, &c.nat->regs.cr.tpr); vcpu_get_irr0(v, &c.nat->regs.cr.irr[0]); vcpu_get_irr1(v, &c.nat->regs.cr.irr[1]); vcpu_get_irr2(v, &c.nat->regs.cr.irr[2]); vcpu_get_irr3(v, &c.nat->regs.cr.irr[3]); vcpu_get_itv(v, &c.nat->regs.cr.itv);//XXX vlsapic vcpu_get_pmv(v, &c.nat->regs.cr.pmv); vcpu_get_cmcv(v, &c.nat->regs.cr.cmcv); if (is_hvm) vmx_arch_get_info_guest(v, c);}#if 0// for debugstatic void__rbs_print(const char* func, int line, const char* name, const unsigned long* rbs, unsigned int rbs_size){ unsigned int i; printk("%s:%d %s rbs %p\n", func, line, name, rbs); printk(" rbs_size 0x%016x no 0x%lx\n", rbs_size, rbs_size / sizeof(unsigned long)); for (i = 0; i < rbs_size / sizeof(unsigned long); i++) { const char* zero_or_n = "0x"; if (ia64_rse_is_rnat_slot((unsigned long*)&rbs[i])) zero_or_n = "Nx"; if ((i % 3) == 0) printk("0x%02x:", i); printk(" %s%016lx", zero_or_n, rbs[i]); if ((i % 3) == 2) printk("\n"); } printk("\n"); }#define rbs_print(rbs, rbs_size) \ __rbs_print(__func__, __LINE__, (#rbs), (rbs), (rbs_size))#endifstatic intcopy_rbs(struct vcpu* v, unsigned long* dst_rbs_size, const unsigned long* rbs, unsigned long rbs_size, unsigned long src_rnat, unsigned long rbs_voff){ int rc = -EINVAL; struct page_info* page; unsigned char* vaddr; unsigned long* src_bsp; unsigned long* src_bspstore; struct switch_stack* sw = vcpu_to_switch_stack(v); unsigned long num_regs; unsigned long* dst_bsp; unsigned long* dst_bspstore; unsigned long* dst_rnat; unsigned long dst_rnat_tmp; unsigned long dst_rnat_mask; unsigned long flags; extern void ia64_copy_rbs(unsigned long* dst_bspstore, unsigned long* dst_rbs_size, unsigned long* dst_rnat_p, unsigned long* src_bsp, unsigned long src_rbs_size, unsigned long src_rnat); dst_bspstore = vcpu_to_rbs_bottom(v); *dst_rbs_size = rbs_size; if (rbs_size == 0) return 0; // rbs offset depends on sizeof(struct vcpu) so that // it's too unstable for hypercall ABI. // we need to take rbs offset into acount. //memcpy(dst_bspstore, c.nat->regs.rbs, rbs_size); // It is assumed that rbs_size is small enough compared // to KERNEL_STACK_SIZE. page = alloc_domheap_pages(NULL, KERNEL_STACK_SIZE_ORDER, 0); if (page == NULL) return -ENOMEM; vaddr = page_to_virt(page); src_bspstore = (unsigned long*)(vaddr + rbs_voff * 8); src_bsp = (unsigned long*)((unsigned char*)src_bspstore + rbs_size); if ((unsigned long)src_bsp >= (unsigned long)vaddr + PAGE_SIZE) goto out; memcpy(src_bspstore, rbs, rbs_size); num_regs = ia64_rse_num_regs(src_bspstore, src_bsp); dst_bsp = ia64_rse_skip_regs(dst_bspstore, num_regs); *dst_rbs_size = (unsigned long)dst_bsp - (unsigned long)dst_bspstore; // rough check. if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2) goto out; // ia64_copy_rbs() uses real cpu's stack register. // So it may fault with an Illigal Operation fault resulting // in panic if rbs_size is too large to load compared to // the number of physical stacked registers, RSE.N_STACKED_PHYS, // which is cpu implementatin specific. // See SDM vol. 2 Register Stack Engine 6, especially 6.5.5. // // For safe operation and cpu model independency, // we need to copy them by hand without loadrs and flushrs // However even if we implement that, similar issue still occurs // when running guest. CPU context restore routine issues loadrs // resulting in Illegal Operation fault. And what if the vRSE is in // enforced lazy mode? We can't store any dirty stacked registers // into RBS without cover or br.call. if (num_regs > num_phys_stacked) { rc = -ENOSYS; gdprintk(XENLOG_WARNING, "%s:%d domain %d: can't load stacked registres\n" "requested size 0x%lx => 0x%lx, num regs %ld" "RSE.N_STACKED_PHYS %ld\n", __func__, __LINE__, v->domain->domain_id, rbs_size, *dst_rbs_size, num_regs, num_phys_stacked); goto out; } // we mask interrupts to avoid using register backing store. local_irq_save(flags); ia64_copy_rbs(dst_bspstore, dst_rbs_size, &dst_rnat_tmp, src_bsp, rbs_size, src_rnat); local_irq_restore(flags); dst_rnat_mask = (1UL << ia64_rse_slot_num(dst_bsp)) - 1; dst_rnat = ia64_rse_rnat_addr(dst_bsp); if ((unsigned long)dst_rnat > sw->ar_bspstore) dst_rnat = &sw->ar_rnat; // if ia64_rse_rnat_addr(dst_bsp) == // ia64_rse_rnat_addr(vcpu_to_rbs_bottom(v)), the lsb bit of rnat // is just ignored. so we don't have to mask it out. *dst_rnat = (*dst_rnat & ~dst_rnat_mask) | (dst_rnat_tmp & dst_rnat_mask); rc = 0;out: free_domheap_pages(page, KERNEL_STACK_SIZE_ORDER); return rc;}static voidunat_update(unsigned long *unat_eml, unsigned long *spill_addr, char nat){ unsigned int pos = ia64_unat_pos(spill_addr); if (nat) *unat_eml |= (1UL << pos); else *unat_eml &= ~(1UL << pos);}int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c){ struct cpu_user_regs *uregs = vcpu_regs(v); struct domain *d = v->domain; struct switch_stack *sw = vcpu_to_switch_stack(v); int was_initialised = v->is_initialised; struct unw_frame_info info; unsigned int rbs_size; unsigned int num_regs; unsigned long * const rbs_bottom = vcpu_to_rbs_bottom(v); int rc = 0; int i; /* Finish vcpu initialization. */ if (!was_initialised) { if (is_hvm_domain(d)) rc = vmx_final_setup_guest(v); else rc = vcpu_late_initialise(v); if (rc != 0) return rc; vcpu_init_regs(v); v->is_initialised = 1; /* Auto-online VCPU0 when it is initialised. */ if (v->vcpu_id == 0 || (c.nat != NULL && c.nat->flags & VGCF_online)) clear_bit(_VPF_down, &v->pause_flags); } if (c.nat == NULL) return 0; uregs->b6 = c.nat->regs.b[6]; uregs->b7 = c.nat->regs.b[7]; memset(&info, 0, sizeof(info)); unw_init_from_blocked_task(&info, v); if (vcpu_has_not_run(v)) { sw->ar_lc = c.nat->regs.ar.lc; sw->ar_pfs = (sw->ar_pfs & ~AR_PFS_PEC_MASK) | ((c.nat->regs.ar.ec << AR_PFS_PEC_SHIFT) & AR_PFS_PEC_MASK); } else if (unw_unwind_to_user(&info) < 0) { /* warn: should panic? */ gdprintk(XENLOG_ERR, "vcpu=%d unw_unwind_to_user() failed.\n", v->vcpu_id); show_stack(v, NULL); //return -ENOSYS; } else { unw_set_ar(&info, UNW_AR_LC, c.nat->regs.ar.lc); unw_set_ar(&info, UNW_AR_EC, c.nat->regs.ar.ec); } uregs->ar_csd = c.nat->regs.ar.csd; uregs->ar_ssd = c.nat->regs.ar.ssd; uregs->r8 = c.nat->regs.r[8]; uregs->r9 = c.nat->regs.r[9]; uregs->r10 = c.nat->regs.r[10]; uregs->r11 = c.nat->regs.r[11]; if (!is_hvm_domain(d)) vcpu_set_psr(v, c.nat->regs.psr); else vmx_vcpu_set_psr(v, c.nat->regs.psr); uregs->cr_iip = c.nat->regs.ip; uregs->cr_ifs = c.nat->regs.cfm; uregs->ar_unat = c.nat->regs.ar.unat; uregs->ar_pfs = c.nat->regs.ar.pfs; uregs->ar_rsc = c.nat->regs.ar.rsc; uregs->ar_rnat = c.nat->regs.ar.rnat; uregs->ar_bspstore = c.nat->regs.ar.bspstore; uregs->pr = c.nat->regs.pr; uregs->b0 = c.nat->regs.b[0]; num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore, (unsigned long*)c.nat->regs.ar.bsp); rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) - (unsigned long)rbs_bottom; if (rbs_size > sizeof (c.nat->regs.rbs)) { gdprintk(XENLOG_INFO, "rbs size is too large %x > %lx\n", rbs_size, sizeof (c.nat->regs.rbs)); return -EINVAL; } if (rbs_size > 0 && ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff) gdprintk(XENLOG_INFO, "rbs stack offset is different! xen 0x%x given 0x%x", (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff); /* Protection against crazy user code. */ if (!was_initialised) uregs->loadrs = (rbs_size << 16); if (rbs_size == (uregs->loadrs >> 16)) { unsigned long dst_rbs_size = 0; if (vcpu_has_not_run(v)) sw->ar_bspstore = (unsigned long)rbs_bottom; rc = copy_rbs(v, &dst_rbs_size, c.nat->regs.rbs, rbs_size, c.nat->regs.rbs_rnat, c.nat->regs.rbs_voff); if (rc < 0) return rc; /* In case of newly created vcpu, ar_bspstore points to * the bottom of register stack. Move it up. * See also init_switch_stack(). */ if (vcpu_has_not_run(v)) { uregs->loadrs = (dst_rbs_size << 16); sw->ar_bspstore = (unsigned long)((char*)rbs_bottom + dst_rbs_size); } } // inhibit save/restore between cpus of different RSE.N_STACKED_PHYS. // to avoid nasty issues. // // The number of physical stacked general register(RSE.N_STACKED_PHYS) // isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and // the value might be exported to user/user process. // (Linux does via /proc/cpuinfo) // The SDM says only that the number is cpu implementation specific. // // If the number of restoring cpu is different from one of saving cpu, // the following, or something worse, might happen. // - Xen VMM itself may panic when issuing loadrs to run guest with // illegal operation fault // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of // restoring CPU // This case is detected to refuse restore by rbs_copy() // - guest kernel may panic with illegal operation fault // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of // restoring CPU // - infomation leak from guest kernel to user process // When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of // restoring CPU // Before returning to user process, kernel should zero clear all // physical stacked resgisters to prevent kernel bits leak. // It would be based on RSE.N_STACKED_PHYS (Linux does.). // On the restored environtment the kernel clears only a part // of the physical stacked registers. // - user processes or human operators would be confused. // RSE.N_STACKED_PHYS might be exported to user process or human // operators. Actually on linux it is exported via /proc/cpuinfo. // user processes might use it. // I don't know any concrete example, but it's possible in theory. // e.g. thread libraly may allocate RBS area based on the value. // (Fortunately glibc nptl doesn't) if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */ c.nat->regs.num_phys_stacked != num_phys_stacked) { gdprintk(XENLOG_WARNING, "num phys stacked is different! " "xen 0x%lx given 0x%lx", num_phys_stacked, c.nat->regs.num_phys_stacked); return -EINVAL; } uregs->r1 = c.nat->regs.r[1]; uregs->r12 = c.nat->regs.r[12]; uregs->r13 = c.nat->regs.r[13]; uregs->ar_fpsr = c.nat->regs.ar.fpsr; uregs->r15 = c.nat->regs.r[15]; uregs->r14 = c.nat->regs.r[14]; uregs->r2 = c.nat->regs.r[2]; uregs->r3 = c.nat->regs.r[3]; uregs->r16 = c.nat->regs.r[16]; uregs->r17 = c.nat->regs.r[17]; uregs->r18 = c.nat->regs.r[18]; uregs->r19 = c.nat->regs.r[19]; uregs->r20 = c.nat->regs.r[20]; uregs->r21 = c.nat->regs.r[21]; uregs->r22 = c.nat->regs.r[22]; uregs->r23 = c.nat->regs.r[23]; uregs->r24 = c.nat->regs.r[24]; uregs->r25 = c.nat->regs.r[25]; uregs->r26 = c.nat->regs.r[26]; uregs->r27 = c.nat->regs.r[27]; uregs->r28 = c.nat->regs.r[28]; uregs->r29 = c.nat->regs.r[29]; uregs->r30 = c.nat->regs.r[30]; uregs->r31 = c.nat->regs.r[31]; uregs->ar_ccv = c.nat->regs.ar.ccv; COPY_FPREG(&sw->f2, &c.nat->regs.f[2]); COPY_FPREG(&sw->f3, &c.nat->regs.f[3]); COPY_FPREG(&sw->f4, &c.nat->regs.f[4]); COPY_FPREG(&sw->f5, &c.nat->regs.f[5]); COPY_FPREG(&uregs->f6, &c.nat->regs.f[6]); COPY_FPREG(&uregs->f7, &c.nat->regs.f[7]); COPY_FPREG(&uregs->f8, &c.nat->regs.f[8]); COPY_FPREG(&uregs->f9, &c.nat->regs.f[9]); COPY_FPREG(&uregs->f10, &c.nat->regs.f[10]); COPY_FPREG(&uregs->f11, &c.nat->regs.f[11]); COPY_FPREG(&sw->f12, &c.nat->regs.f[12]); COPY_FPREG(&sw->f13, &c.nat->regs.f[13]); COPY_FPREG(&sw->f14, &c.nat->regs.f[14]); COPY_FPREG(&sw->f15, &c.nat->regs.f[15]); COPY_FPREG(&sw->f16, &c.nat->regs.f[16]); COPY_FPREG(&sw->f17, &c.nat->regs.f[17]); COPY_FPREG(&sw->f18, &c.nat->regs.f[18]); COPY_FPREG(&sw->f19, &c.nat->regs.f[19]); COPY_FPREG(&sw->f20, &c.nat->regs.f[20]); COPY_FPREG(&sw->f21, &c.nat->regs.f[21]); COPY_FPREG(&sw->f22, &c.nat->regs.f[22]); COPY_FPREG(&sw->f23, &c.nat->regs.f[23]); COPY_FPREG(&sw->f24, &c.nat->regs.f[24]); COPY_FPREG(&sw->f25, &c.nat->regs.f[25]); COPY_FPREG(&sw->f26, &c.nat->regs.f[26]); COPY_FPREG(&sw->f27, &c.nat->regs.f[27]); COPY_FPREG(&sw->f28, &c.nat->regs.f[28]); COPY_FPREG(&sw->f29, &c.nat->regs.f[29]); COPY_FPREG(&sw->f30, &c.nat->regs.f[30]); COPY_FPREG(&sw->f31, &c.nat->regs.f[31]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -