📄 hyperprivop.s
字号:
st8 [r24]=r25 // save for metaphysical mov rr[r26]=r25 dv_serialize_data // rr1 st8 [r22]=r9, 8 // current->rrs[1] = r9 add r26=r26,r30 // r26 = 0x2000000000000000 extr.u r27=r17,0,8 extr.u r28=r17,8,8 extr.u r29=r17,16,8;; dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data // rr2 st8 [r22]=r10, 8 // current->rrs[2] = r10 add r26=r26,r30 // r26 = 0x4000000000000000 extr.u r27=r19,0,8 extr.u r28=r19,8,8 extr.u r29=r19,16,8;; dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data // rr3 st8 [r22]=r11, 8 // current->rrs[3] = r11 add r26=r26,r30 // r26 = 0x6000000000000000 extr.u r27=r20,0,8 extr.u r28=r20,8,8 extr.u r29=r20,16,8;; dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data // rr4 st8 [r22]=r14 // current->rrs[4] = r14 add r26=r26,r30 // r26 = 0x8000000000000000 extr.u r27=r21,0,8 extr.u r28=r21,8,8 extr.u r29=r21,16,8;; dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data#else // shuffled version // rr0 // uses r27, r28, r29 for mangling // r25 for mangled value st8 [r22]=r8, 8 // current->rrs[0] = r8 mov r26=0 // r26=0x0000000000000000 extr.u r27=r16,0,8 extr.u r28=r16,8,8 extr.u r29=r16,16,8;; dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; st8 [r24]=r25 // save for metaphysical mov rr[r26]=r25 dv_serialize_data // r16, r24, r25 is usable. // rr1 // uses r25, r28, r29 for mangling // r25 for mangled value extr.u r25=r17,0,8 extr.u r28=r17,8,8 st8 [r22]=r9, 8 // current->rrs[1] = r9 extr.u r29=r17,16,8 ;; add r26=r26,r30 // r26 = 0x2000000000000000 extr.u r24=r19,8,8 extr.u r16=r19,0,8 dep r25=r25,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data // r16, r17, r24, r25 is usable // rr2 // uses r16, r24, r29 for mangling // r17 for mangled value extr.u r29=r19,16,8 extr.u r27=r20,0,8 st8 [r22]=r10, 8 // current->rrs[2] = r10 add r26=r26,r30 // r26 = 0x4000000000000000 dep r17=r16,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r17=r24,r17,16,8;; dep r17=r29,r17,8,8;; mov rr[r26]=r17 dv_serialize_data // r16, r17, r19, r24, r25 is usable // rr3 // uses r27, r28, r29 for mangling // r25 for mangled value extr.u r28=r20,8,8 extr.u r29=r20,16,8 st8 [r22]=r11, 8 // current->rrs[3] = r11 extr.u r16=r21,0,8 add r26=r26,r30 // r26 = 0x6000000000000000 dep r25=r27,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r28,r25,16,8;; dep r25=r29,r25,8,8;; mov rr[r26]=r25 dv_serialize_data // r16, r17, r19, r20, r24, r25 // rr4 // uses r16, r17, r24 for mangling // r25 for mangled value extr.u r17=r21,8,8 extr.u r24=r21,16,8 st8 [r22]=r14 // current->rrs[4] = r14 add r26=r26,r30 // r26 = 0x8000000000000000 dep r25=r16,r23,24,8;; // mangling is swapping bytes 1 & 3 dep r25=r17,r25,16,8;; dep r25=r24,r25,8,8;; mov rr[r26]=r25 dv_serialize_data#endif // done, mosey on back1: mov r24=cr.ipsr mov r25=cr.iip;; extr.u r26=r24,IA64_PSR_RI_BIT,2 ;; cmp.eq p6,p7=2,r26 ;;(p6) mov r26=0(p6) adds r25=16,r25(p7) adds r26=1,r26 ;; dep r24=r26,r24,IA64_PSR_RI_BIT,2 ;; mov cr.ipsr=r24 mov cr.iip=r25 mov pr=r31,-1 ;; rfi ;;END(hyper_set_rr0_to_rr4)ENTRY(hyper_set_kr) extr.u r25=r8,3,61;; cmp.ne p7,p0=r0,r25 // if kr# > 7, go slow way(p7) br.spnt.many dispatch_break_fault ;;#ifdef FAST_HYPERPRIVOP_CNT movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_SET_KR);; ld4 r21=[r20];; adds r21=1,r21;; st4 [r20]=r21;;#endif adds r21=XSI_KR0_OFS-XSI_PSR_IC_OFS,r18 shl r20=r8,3;; add r22=r20,r21;; st8 [r22]=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar0=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar1=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar2=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar3=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar4=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar5=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar6=r9;; cmp.eq p7,p0=r8,r0 adds r8=-1,r8;;(p7) mov ar7=r9;; // done, mosey on back1: mov r24=cr.ipsr mov r25=cr.iip;; extr.u r26=r24,IA64_PSR_RI_BIT,2 ;; cmp.eq p6,p7=2,r26 ;;(p6) mov r26=0(p6) adds r25=16,r25(p7) adds r26=1,r26 ;; dep r24=r26,r24,IA64_PSR_RI_BIT,2 ;; mov cr.ipsr=r24 mov cr.iip=r25 mov pr=r31,-1 ;; rfi ;;END(hyper_set_kr)// this routine was derived from optimized assembly output from// vcpu_thash so it is dense and difficult to read but it works// On entry:// r18 == XSI_PSR_IC// r31 == prENTRY(hyper_thash)#ifdef FAST_HYPERPRIVOP_CNT movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_THASH);; ld4 r21=[r20];; adds r21=1,r21;; st4 [r20]=r21;;#endif shr.u r20 = r8, 61 addl r25 = 1, r0 movl r17 = 0xe000000000000000 ;; and r21 = r17, r8 // VHPT_Addr1 ;; shladd r28 = r20, 3, r18 adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 ;; adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 addl r28 = 32767, r0 ld8 r24 = [r19] // pta ;; ld8 r23 = [r27] // rrs[vadr>>61] extr.u r26 = r24, IA64_PTA_SIZE_BIT, IA64_PTA_SIZE_LEN ;; extr.u r22 = r23, IA64_RR_PS, IA64_RR_PS_LEN shl r30 = r25, r26 ;; shr.u r19 = r8, r22 shr.u r29 = r24, 15 ;; adds r17 = -1, r30 ;; shladd r27 = r19, 3, r0 extr.u r26 = r17, 15, 46 ;; andcm r24 = r29, r26 and r19 = r28, r27 shr.u r25 = r27, 15 ;; and r23 = r26, r25 ;; or r22 = r24, r23 ;; dep.z r20 = r22, 15, 46 ;; or r16 = r20, r21 ;; or r8 = r19, r16 // done, update iip/ipsr to next instruction mov r24=cr.ipsr mov r25=cr.iip;; extr.u r26=r24,IA64_PSR_RI_BIT,2 ;; cmp.eq p6,p7=2,r26 ;;(p6) mov r26=0(p6) adds r25=16,r25(p7) adds r26=1,r26 ;; dep r24=r26,r24,IA64_PSR_RI_BIT,2 ;; mov cr.ipsr=r24 mov cr.iip=r25 mov pr=r31,-1 ;; rfi ;;END(hyper_thash)ENTRY(hyper_ptc_ga)#ifndef FAST_PTC_GA br.spnt.few dispatch_break_fault ;;#endif // FIXME: validate not flushing Xen addresses#ifdef FAST_HYPERPRIVOP_CNT movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_PTC_GA);; ld4 r21=[r20];; adds r21=1,r21;; st4 [r20]=r21;;#endif movl r21=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r21=[r21];; adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r21 mov r28=r8 extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) mov r20=1 shr.u r24=r8,61 movl r26=0x8000000000000000 // INVALID_TI_TAG mov r30=ar.lc ;; ld1 r22=[r22] // current->arch.vhpt_pg_shift shl r19=r20,r19 cmp.eq p7,p0=7,r24(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 ;; shl r27=r22,2 // vhpt_pg_shift<<2 (for ptc.ga) shr.u r23=r19,r22 // repeat loop for n pages cmp.le p7,p0=r19,r0 // skip flush if size<=0(p7) br.cond.dpnt 2f ;; shl r24=r23,r22;; cmp.ne p7,p0=r24,r23 ;;(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter mov ar.lc=r23 shl r29=r20,r22;; // page_size1: thash r25=r28 ;; adds r25=16,r25 ;; ld8 r24=[r25] ;; // FIXME: should check if tag matches, not just blow it away or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1 st8 [r25]=r24 ptc.ga r28,r27 ;; srlz.i ;; add r28=r29,r28 br.cloop.sptk.few 1b ;;2: mov ar.lc=r30 ;; mov r29=cr.ipsr mov r30=cr.iip;; adds r25=IA64_VCPU_DTLB_OFFSET,r21 adds r26=IA64_VCPU_ITLB_OFFSET,r21;; ld8 r24=[r25] ld8 r27=[r26] ;; and r24=-2,r24 and r27=-2,r27 ;; st8 [r25]=r24 // set 1-entry i/dtlb as not present st8 [r26]=r27 ;; // increment to point to next instruction extr.u r26=r29,IA64_PSR_RI_BIT,2 ;; cmp.eq p6,p7=2,r26 ;;(p6) mov r26=0(p6) adds r30=16,r30(p7) adds r26=1,r26 ;; dep r29=r26,r29,IA64_PSR_RI_BIT,2 ;; mov cr.ipsr=r29 mov cr.iip=r30 mov pr=r31,-1 ;; rfi ;;END(hyper_ptc_ga)// recovery block for hyper_itc metaphysical memory lookupENTRY(recover_and_dispatch_break_fault)#ifdef PERF_COUNTERS movl r21=PERFC(recover_to_break_fault);; ld4 r22=[r21];; adds r22=1,r22;; st4 [r21]=r22;;#endif mov b0=r29 ;; br.sptk.many dispatch_break_fault;;END(recover_and_dispatch_break_fault)// Registers at entry// r17 = break immediate (HYPERPRIVOP_ITC_D or I)// r18 == XSI_PSR_IC_OFS// r31 == prENTRY(hyper_itc)hyper_itc_i: // fall through, hyper_itc_d handles both i and dhyper_itc_d: #ifndef FAST_ITC br.sptk.many dispatch_break_fault ;;#else // ensure itir.ps >= xen's pagesize movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r27=[r27];; adds r22=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27 adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; ld1 r22=[r22] ld8 r23=[r23];; extr.u r24=r23,IA64_ITIR_PS,IA64_ITIR_PS_LEN;; // r24==logps cmp.gt p7,p0=r22,r24(p7) br.spnt.many dispatch_break_fault ;; adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r21];; // for now, punt on region0 inserts extr.u r21=r21,61,3;; cmp.eq p7,p0=r21,r0(p7) br.spnt.many dispatch_break_fault ;; adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;; ld8 r27=[r27]// FIXME: is the global var dom0 always pinned? assume so for now movl r28=dom0;; ld8 r28=[r28];;// FIXME: for now, only handle dom0 (see lookup_domain_mpa below) cmp.ne p7,p0=r27,r28(p7) br.spnt.many dispatch_break_fault ;;#ifdef FAST_HYPERPRIVOP_CNT cmp.eq p6,p7=HYPERPRIVOP_ITC_D,r17;;(p6) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_D)(p7) movl r20=FAST_HYPERPRIVOP_PERFC(HYPERPRIVOP_ITC_I);; ld4 r21=[r20];; adds r21=1,r21;; st4 [r20]=r21;;#endif(p6) mov r17=2;;(p7) mov r17=3;; mov r29=b0 ;; movl r30=recover_and_dispatch_break_fault ;; mov r16=r8;; // fall through#endifEND(hyper_itc)#if defined(FAST_ITC) || defined (FAST_TLB_MISS_REFLECT)// fast_insert(PSCB(ifa),r24=ps,r16=pte)// r16 == pte// r17 == bit0: 1=inst, 0=data; bit1: 1=itc, 0=vcpu_translate// r18 == XSI_PSR_IC_OFS// r24 == ps// r29 == saved value of b0 in case of recovery// r30 == recovery ip if failure occurs// r31 == prENTRY(fast_insert) // translate_domain_pte(r16=pteval,PSCB(ifa)=address,r24=itir) mov r19=1 movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; shl r20=r19,r24 ld8 r27=[r27];; adds r23=IA64_VCPU_VHPT_PG_SHIFT_OFFSET,r27 adds r20=-1,r20 // r20 == mask movl r19=_PAGE_PPN_MASK;; ld1 r23=[r23] mov r25=-1 and r22=r16,r19;; // r22 == pteval & _PAGE_PPN_MASK andcm r19=r22,r20 shl r25=r25,r23 // -1 << current->arch.vhpt_pg_shift adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r21];; and r20=r21,r20;; or r19=r19,r20;; // r19 == mpaddr// FIXME: for now, just do domain0 and skip mpaddr range checks and r20=r25,r19 movl r21=PAGE_PHYS ;; or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa // r16=pteval,r20=pteval2 movl r19=_PAGE_PPN_MASK movl r21=_PAGE_PL_PRIV;; andcm r25=r16,r19 // r25==pteval & ~_PAGE_PPN_MASK and r22=r20,r19;; or r22=r22,r21;; or r22=r22,r25;; // r22==return value from translate_domain_pte // done with translate_domain_pte // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r16=mppte,r24=logps)// FIXME: for now, just domain0 and skip range check // psr.ic already cleared // NOTE: r24 still contains ps (from above) shladd r24=r24,2,r0;; mov cr.itir=r24 adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r23=[r23];; mov cr.ifa=r23 tbit.z p6,p7=r17,0;;(p6) itc.d r22(p7) itc.i r22;; dv_serialize_data // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2) thash r28=r23 or r26=1,r22;; ttag r21=r23 adds r25=8,r28 mov r19=r28;; st8 [r25]=r24 adds r20=16,r28;; st8 [r19]=r26 st8 [r20]=r21;; // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa) // TR_ENTRY = {page_flags,itir,addr,rid} tbit.z p6,p7=r17,0 adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27(p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27(p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;; st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1 st8 [r27]=r24,8 // itir mov r19=-4096;; and r23=r23,r19;; st8 [r27]=r23,8 // ifa & ~0xfff adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 extr.u r25=r23,61,3;; shladd r29=r25,3,r29;; ld8 r29=[r29] movl r20=IA64_RR_RID_MASK;; and r29=r29,r20;; st8 [r27]=r29,-8;; // rid //if ps > 12 cmp.eq p7,p0=12<<IA64_ITIR_PS,r24(p7) br.cond.sptk.many 1f;; // if (ps > 12) { // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); } extr.u r29=r24,IA64_ITIR_PS,IA64_ITIR_PS_LEN mov r28=1;; shl r26=r28,r29;; adds r29=-12,r29;; shl r25=r28,r29;; mov r29=-1 adds r26=-1,r26 adds r25=-1,r25;; andcm r26=r29,r26 // ~((1UL<<ps)-1) andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1) ld8 r29=[r27];; and r29=r29,r26;; st8 [r27]=r29,-16;; ld8 r29=[r27];; extr.u r28=r29,12,38;; movl r26=0xfffc000000000fff;; and r29=r29,r26 and r28=r28,r25;; shl r28=r28,12;; or r29=r29,r28;; st8 [r27]=r29;;1: // done with vcpu_set_tr_entry //PSCBX(vcpu,i/dtlb_pte) = mp_pte movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; ld8 r27=[r27];; tbit.z p6,p7=r17,0;;(p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27(p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;; st8 [r27]=r16;; // done with vcpu_itc_no_srlz // if hyper_itc, increment to point to next instruction tbit.z p7,p0=r17,1(p7) br.cond.sptk.few no_inc_iip;; mov r29=cr.ipsr mov r30=cr.iip;; extr.u r26=r29,IA64_PSR_RI_BIT,2 ;; cmp.eq p6,p7=2,r26 ;;(p6) mov r26=0(p6) adds r30=16,r30(p7) adds r26=1,r26 ;; dep r29=r26,r29,IA64_PSR_RI_BIT,2 ;; mov cr.ipsr=r29 mov cr.iip=r30;;no_inc_iip: mov pr=r31,-1 ;; rfi ;;END(fast_insert)#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -