📄 hyperprivop.s
字号:
(p7) br.spnt.few dispatch_break_fault ;; cmp.eq p7,p0=r22,r17(p7) br.spnt.few dispatch_break_fault ;;#endif movl r20=0x2c00 // save iim in shared_info adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r17;; // fall throughEND(fast_break_reflect)// reflect to domain ivt+r20// sets up isr,iip,ipsr,ifs (FIXME: do iipa too)// r16 == cr.isr// r18 == XSI_PSR_IC// r20 == offset into ivt// r29 == iip// r30 == ipsr// r31 == prENTRY(fast_reflect)#ifdef FAST_REFLECT_CNT movl r22=PERFC(fast_reflect) shr r23=r20,8-2;; add r22=r22,r23;; ld4 r21=[r22];; adds r21=1,r21;; st4 [r22]=r21;;#endif // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r29,XSI_ISR_OFS-XSI_IIP_OFS;; // set shared_mem isr st8 [r21]=r16 ;; // set cr.ipsr movl r21=THIS_CPU(current_psr_i_addr) mov r29=r30 ;; ld8 r21=[r21] movl r28=DELIVER_PSR_SET | (CONFIG_CPL0_EMUL << IA64_PSR_CPL0_BIT) movl r27=~DELIVER_PSR_CLR;; and r29=r29,r27;; or r29=r29,r28;; // set hpsr_dfh to ipsr adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;; ld1 r28=[r28];; dep r29=r28,r29,IA64_PSR_DFH_BIT,1;; mov cr.ipsr=r29;; // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set) extr.u r29=r30,IA64_PSR_CPL0_BIT,2;; cmp.eq p7,p0=CONFIG_CPL0_EMUL,r29;;(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT) movl r27=~(IA64_PSR_PP|IA64_PSR_BN);; or r30=r30,r28;; and r30=r30,r27 // also set shared_mem ipsr.i and ipsr.ic appropriately ld1 r22=[r21] ld4 r24=[r18];; cmp4.eq p6,p7=r24,r0;;(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 mov r24=r21 cmp.ne p6,p7=r22,r0;;(p6) dep r30=0,r30,IA64_PSR_I_BIT,1(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 mov r22=1 adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;; ld1 r28=[r27];; st1 [r27]=r0 dep r30=r28,r30,IA64_PSR_DFH_BIT,1 ;; st8 [r21]=r30 // set shared_mem interrupt_delivery_enabled to 0 // set shared_mem interrupt_collection_enabled to 0 st1 [r24]=r22 st4 [r18]=r0;; // cover and set shared_mem precover_ifs to cr.ifs // set shared_mem ifs to 0 cover ;; mov r24=cr.ifs adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r0 ;; adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r24 // FIXME: need to save iipa and isr to be arch-compliant // set iip to go to domain IVA break instruction vector movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r22=[r22];; adds r22=IA64_VCPU_IVA_OFFSET,r22;; ld8 r23=[r22];; add r20=r20,r23;; mov cr.iip=r20 // OK, now all set to go except for switch to virtual bank0 mov r30=r2 mov r29=r3#ifdef HANDLE_AR_UNAT mov r28=ar.unat#endif ;; adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18 ;; bsw.1;; .mem.offset 0,0; st8.spill [r2]=r16,16 .mem.offset 8,0; st8.spill [r3]=r17,16 ;; .mem.offset 0,0; st8.spill [r2]=r18,16 .mem.offset 8,0; st8.spill [r3]=r19,16 ;; .mem.offset 0,0; st8.spill [r2]=r20,16 .mem.offset 8,0; st8.spill [r3]=r21,16 ;; .mem.offset 0,0; st8.spill [r2]=r22,16 .mem.offset 8,0; st8.spill [r3]=r23,16 ;; .mem.offset 0,0; st8.spill [r2]=r24,16 .mem.offset 8,0; st8.spill [r3]=r25,16 ;; .mem.offset 0,0; st8.spill [r2]=r26,16 .mem.offset 8,0; st8.spill [r3]=r27,16 ;; .mem.offset 0,0; st8.spill [r2]=r28,16 .mem.offset 8,0; st8.spill [r3]=r29,16 ;; .mem.offset 0,0; st8.spill [r2]=r30,16 .mem.offset 8,0; st8.spill [r3]=r31,16 ;;#ifdef HANDLE_AR_UNAT // r16~r23 are preserved regs in bank0 regs, we need to restore them, // r24~r31 are scratch regs, we don't need to handle NaT bit, // because OS handler must assign it before access it ld8 r16=[r2],16 ld8 r17=[r3],16;; ld8 r18=[r2],16 ld8 r19=[r3],16;; ld8 r20=[r2],16 ld8 r21=[r3],16;; ld8 r22=[r2],16 ld8 r23=[r3],16;;#endif ;; bsw.0 ;; mov r24=ar.unat mov r2=r30 mov r3=r29#ifdef HANDLE_AR_UNAT mov ar.unat=r28#endif ;; adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r25]=r24 st4 [r20]=r0 mov pr=r31,-1 ;; rfi ;;END(fast_reflect)// reflect access faults (0x2400,0x2800,0x5300) directly to domain// r16 == isr// r17 == ifa// r19 == reflect number (only pass-thru to dispatch_reflection)// r20 == offset into ivt// r31 == prGLOBAL_ENTRY(fast_access_reflect)#ifndef FAST_ACCESS_REFLECT // see beginning of file br.spnt.few dispatch_reflection ;;#endif mov r30=cr.ipsr mov r29=cr.iip;; tbit.nz p7,p0=r30,IA64_PSR_PP_BIT(p7) br.spnt.few dispatch_reflection ;; extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; cmp.eq p7,p0=r21,r0(p7) br.spnt.few dispatch_reflection ;; movl r18=THIS_CPU(current_psr_ic_addr);; ld8 r18=[r18];; ld4 r21=[r18];; cmp.eq p7,p0=r0,r21(p7) br.spnt.few dispatch_reflection ;; // set shared_mem ifa, FIXME: should we validate it? mov r17=cr.ifa adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r17 ;; // get rr[ifa] and save to itir in shared memory (extra bits ignored) shr.u r22=r17,61 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; shladd r22=r22,3,r21;; ld8 r22=[r22];; and r22=~3,r22;; st8 [r23]=r22;; br.cond.sptk.many fast_reflect;;END(fast_access_reflect)// when we get to here, VHPT_CCHAIN_LOOKUP has failed and everything// is as it was at the time of original miss. We want to preserve that// so if we get a nested fault, we can just branch to page_faultGLOBAL_ENTRY(fast_tlb_miss_reflect)#ifndef FAST_TLB_MISS_REFLECT // see beginning of file br.spnt.few page_fault ;;#else mov r31=pr mov r30=cr.ipsr mov r29=cr.iip mov r16=cr.isr mov r17=cr.ifa;; // for now, always take slow path for region 0 (e.g. metaphys mode) extr.u r21=r17,61,3;; cmp.eq p7,p0=r0,r21(p7) br.spnt.few page_fault ;; // always take slow path for PL0 (e.g. __copy_from_user) extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; cmp.eq p7,p0=r21,r0(p7) br.spnt.few page_fault ;; // slow path if strange ipsr or isr bits set tbit.nz p7,p0=r30,IA64_PSR_PP_BIT,1(p7) br.spnt.few page_fault ;; movl r21=IA64_ISR_IR|IA64_ISR_SP|IA64_ISR_NA ;; and r21=r16,r21;; cmp.ne p7,p0=r0,r21(p7) br.spnt.few page_fault ;; // also take slow path if virtual psr.ic=0 movl r18=XSI_PSR_IC;; ld4 r21=[r18];; cmp.eq p7,p0=r0,r21(p7) br.spnt.few page_fault ;; // OK, if we get to here, we are doing a fast vcpu_translate. Need to: // 1) look in the virtual TR's (pinned), if not there // 2) look in the 1-entry TLB (pinned), if not there // 3) check the domain VHPT (NOT pinned, accesses domain memory!) // If we find it in any of these places, we need to effectively do // a hyper_itc_i/d // short-term hack for now, if in region 5-7, take slow path // since all Linux TRs are in region 5 or 7, we need not check TRs extr.u r21=r17,61,3;; cmp.le p7,p0=5,r21(p7) br.spnt.few page_fault ;;fast_tlb_no_tr_match: movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r27=[r27] tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;(p6) adds r25=IA64_VCPU_ITLB_OFFSET,r27(p7) adds r25=IA64_VCPU_DTLB_OFFSET,r27;; ld8 r20=[r25],8;; tbit.z p7,p0=r20,VTLB_PTE_P_BIT // present?(p7) br.cond.spnt.few 1f;; // if ifa is in range of tlb, don't bother to check rid, go slow path ld8 r21=[r25],8;; mov r23=1 extr.u r21=r21,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; shl r22=r23,r21 ld8 r21=[r25],8;; cmp.ltu p7,p0=r17,r21(p7) br.cond.sptk.many 1f; add r21=r22,r21;; cmp.ltu p7,p0=r17,r21(p7) br.cond.spnt.few page_fault;; 1: // check the guest VHPT adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;; ld8 r19=[r19] // if (!rr.ve || !(pta & IA64_PTA_VE)) take slow way for now // FIXME: later, we deliver an alt_d/i vector after thash and itir extr.u r25=r17,61,3 adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; shl r25=r25,3;; add r21=r21,r25;; ld8 r22=[r21];; tbit.z p7,p0=r22,0(p7) br.cond.spnt.few page_fault;; tbit.z p7,p0=r19,IA64_PTA_VE_BIT(p7) br.cond.spnt.few page_fault;; tbit.nz p7,p0=r19,IA64_PTA_VF_BIT // long format VHPT(p7) br.cond.spnt.few page_fault;; // compute and save away itir (r22 & RR_PS_MASK) movl r21=IA64_ITIR_PS_MASK;; and r22=r22,r21;; adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r22;; // save away ifa adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r21]=r17;; // see vcpu_thash to save away iha shr.u r20 = r17, 61 addl r25 = 1, r0 movl r30 = 0xe000000000000000 ;; and r21 = r30, r17 // VHPT_Addr1 ;; shladd r28 = r20, 3, r18 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 ;; adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 addl r28 = 32767, r0 ld8 r24 = [r19] // pta ;; ld8 r23 = [r27] // rrs[vadr>>61] extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN ;; extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN shl r30 = r25, r26 // pt size ;; shr.u r19 = r17, r22 // ifa pg number shr.u r29 = r24, IA64_PTA_BASE_BIT adds r30 = -1, r30 // pt size mask ;; shladd r27 = r19, 3, r0 // vhpt offset extr.u r26 = r30, 15, 46 ;; andcm r24 = r29, r26 and r19 = r28, r27 shr.u r25 = r27, 15 ;; and r23 = r26, r25 ;; or r22 = r24, r23 ;; dep.z r20 = r22, 15, 46 ;; or r30 = r20, r21 ;; //or r8 = r19, r30 or r19 = r19, r30 ;; adds r23=XSI_IHA_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r23]=r19 // done with thash, check guest VHPT adds r20 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18;; ld8 r24 = [r20];; // pta // avoid recursively walking the VHPT // if (((r17=address ^ r24=pta) & ((itir_mask(pta) << 3) >> 3)) != 0) { mov r20=-8 xor r21=r17,r24 extr.u r24=r24,IA64_PTA_SIZE_BIT,IA64_PTA_SIZE_LEN;; shl r20=r20,r24;; shr.u r20=r20,3;; and r21=r20,r21;; cmp.eq p7,p0=r21,r0(p7) br.cond.spnt.few 1f;; // __copy_from_user(&pte, r19=(void *)(*iha), sizeof(pte)=8) // prepare for possible nested dtlb fault mov r29=b0 movl r30=guest_vhpt_miss // now go fetch the entry from the guest VHPT ld8 r20=[r19];; // if we wind up here, we successfully loaded the VHPT entry // this VHPT walker aborts on non-present pages instead // of inserting a not-present translation, this allows // vectoring directly to the miss handler tbit.z p7,p0=r20,0(p7) br.cond.spnt.few page_not_present;;#ifdef FAST_REFLECT_CNT movl r21=PERFC(fast_vhpt_translate);; ld4 r22=[r21];; adds r22=1,r22;; st4 [r21]=r22;;#endif// prepare for fast_insert(PSCB(ifa),PSCB(itir),r16=pte)// r16 == pte// r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate// r18 == XSI_PSR_IC_OFS// r24 == ps// r29 == saved value of b0 in case of recovery// r30 == recovery ip if failure occurs// r31 == pr tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;(p6) mov r17=1(p7) mov r17=0 mov r16=r20 mov r29=b0 movl r30=recover_and_page_fault adds r21=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r24=[r21];; extr.u r24=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN // IFA already in PSCB br.cond.sptk.many fast_insert;;END(fast_tlb_miss_reflect)// we get here if fast_insert fails (e.g. due to metaphysical lookup)ENTRY(recover_and_page_fault)#ifdef PERF_COUNTERS movl r21=PERFC(recover_to_page_fault);; ld4 r22=[r21];; adds r22=1,r22;; st4 [r21]=r22;;#endif mov b0=r29 br.cond.sptk.many page_fault;;// if we wind up here, we missed in guest VHPT so recover// from nested dtlb fault and reflect a tlb fault to the guestguest_vhpt_miss: mov b0=r29 // fault = IA64_VHPT_FAULT mov r20=r0 br.cond.sptk.many 1f; // if we get to here, we are ready to reflect // need to set up virtual ifa, iha, itir (fast_reflect handles // virtual isr, iip, ipsr, ifs // see vcpu_get_itir_on_fault: get ps,rid,(FIXME key) from rr[ifa]page_not_present: tbit.nz p6,p7=r16,IA64_ISR_X_BIT;;(p6) movl r20=0x400(p7) movl r20=0x8001: extr.u r25=r17,61,3;; adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 shl r25=r25,3;; add r21=r21,r25;; ld8 r22=[r21];; extr.u r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN;; dep.z r22=r22,IA64_RR_PS,IA64_RR_PS_LEN+IA64_RR_RID_LEN adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; st8 [r23]=r22 // fast reflect expects // r16 == cr.isr // r18 == XSI_PSR_IC // r20 == offset into ivt // r29 == iip // r30 == ipsr // r31 == pr //mov r16=cr.isr mov r29=cr.iip mov r30=cr.ipsr br.sptk.many fast_reflect;;#endifEND(fast_tlb_miss_reflect)ENTRY(slow_vcpu_rfi) adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18;; ld8 r22=[r22];; tbit.z p6,p0=r22,63(p6) br.spnt.few dispatch_break_fault ;; // If vifs.v is set, we have two IFS to consider: // * the guest IFS // * the hypervisor IFS (validated by cover) // Because IFS is copied to CFM and is used to adjust AR.BSP, // virtualization of rfi is not easy. // Previously there was a two steps method (a first rfi jumped to // a stub which performed a new rfi). // This new method discards the RS before executing the hypervisor // cover. After cover, IFS.IFM will be zero. This IFS would simply // clear CFM but not modifying AR.BSP. Therefore the guest IFS can // be used instead and there is no need of a second rfi. // Discarding the RS with the following alloc instruction just clears // CFM, which is safe because rfi will overwrite it. // There is a drawback: because the RS must be discarded before // executing C code, emulation of rfi must go through an hyperprivop // and not through normal instruction decoding. alloc r22=ar.pfs,0,0,0,0 br.spnt.few dispatch_break_fault ;;END(slow_vcpu_rfi) // ensure that, if giving up, registers at entry to fast_hyperprivop unchangedENTRY(hyper_rfi)#ifndef FAST_RFI br.spnt.few slow_vcpu_rfi ;;#endif // if interrupts pending and vcr.ipsr.i=1, do it the slow way adds r19=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 adds r23=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 cmp.ne p8,p0=r20,r0;; // evtchn_upcall_pending != 0 // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way ld8 r21=[r19],XSI_IIP_OFS-XSI_IPSR_OFS // r21=vcr.ipsr movl r20=~(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);; or r20=r20,r21 // p8 determines whether we might deliver an immediate extint(p8) tbit.nz p8,p0=r21,IA64_PSR_I_BIT;; cmp.ne p7,p0=-1,r20 ld4 r23=[r23] // r23=metaphysical_mode#ifndef RFI_TO_INTERRUPT // see beginning of file(p8) br.cond.spnt.few slow_vcpu_rfi#endif(p7) br.spnt.few slow_vcpu_rfi;; // if was in metaphys mode, do it the slow way (FIXME later?) cmp.ne p7,p0=r23,r0 ld8 r22=[r19] // r22=vcr.iip(p7) br.spnt.few slow_vcpu_rfi;; // OK now, let's do an rfi.#ifdef FAST_HYPERPRIVOP_CNT movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_RFI);; ld4 r23=[r20];; adds r23=1,r23;; st4 [r20]=r23;;#endif#ifdef RFI_TO_INTERRUPT // maybe do an immediate interrupt delivery?(p8) br.cond.spnt.few rfi_check_extint;;#endifjust_do_rfi: // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip mov cr.iip=r22 extr.u r19=r21,IA64_PSR_CPL0_BIT,2 adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; cmp.gtu p7,p0=CONFIG_CPL0_EMUL,r19 ld8 r20=[r20];;(p7) mov r19=CONFIG_CPL0_EMUL dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set mov cr.ifs=r20 ;; // ipsr.cpl = max(vcr.ipsr.cpl, IA64_PSR_CPL0_BIT); movl r20=THIS_CPU(current_psr_i_addr) dep r21=r19,r21,IA64_PSR_CPL0_BIT,2;; // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic ld8 r20=[r20] mov r19=1 tbit.nz p7,p6=r21,IA64_PSR_I_BIT tbit.nz p9,p8=r21,IA64_PSR_IC_BIT;; // not done yet(p7) st1 [r20]=r0(p6) st1 [r20]=r19(p9) st4 [r18]=r19(p8) st4 [r18]=r0 // force on psr.ic, i, dt, rt, it, bn movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT| \ IA64_PSR_IT|IA64_PSR_BN) // keep cr.ipsr.pp and set vPSR.pp = vIPSR.pp mov r22=cr.ipsr ;; or r21=r21,r20 tbit.z p10,p11 = r22, IA64_PSR_PP_BIT ;; adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;; tbit.z p8,p9 = r21, IA64_PSR_DFH_BIT adds r23=XSI_VPSR_PP_OFS-XSI_PSR_IC_OFS,r18 ;; (p9) mov r27=1;; (p9) st1 [r20]=r27 dep r21=r22,r21,IA64_PSR_PP_BIT,1 (p10) st1 [r23]=r0 (p11) st1 [r23]=r27 ;; (p8) st1 [r20]=r0 (p8) adds r20=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;; (p8) ld1 r27=[r20] ;; (p8) dep r21=r27,r21, IA64_PSR_DFH_BIT, 1 ;; mov cr.ipsr=r21 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; ld4 r21=[r20];; cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?"(p7) br.cond.spnt.few 1f; // OK, now all set to go except for switch to virtual bank1 mov r22=1;; st4 [r20]=r22 mov r30=r2 mov r29=r3 mov r17=ar.unat;; adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18 adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;; ld8 r16=[r16];; mov ar.unat=r16;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -