📄 em86real.s
字号:
addi base,ssb,0 bctr/* d32(,index,scale) */17: NEXTDWORD(offset) beqctr- cr1 # no index: very unlikely lwbrx r3,state,r3 srwi r6,r7,6 slw r3,r3,r6 add offset,offset,r3 bctr/* 8 or 32 bit displacement */18: xori r4,one,0xcc01 # build 0x0000cc00 rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000 lwbrx offset,state,offset cmpw cr2,r4,opcode # use ss ? bgt cr0,20f # 8 bit offset/* 32 bit displacement */ NEXTDWORD(r5) beq- cr1,21f/* d(base,index,scale) */19: lwbrx r3,state,r3 add offset,offset,r5 add offset,offset,r3 bgtctr cr2 addi base,ssb,0 bctr/* 8 bit displacement */20: NEXTBYTE(r5) extsb r5,r5 bne+ cr1,19b/* d(base), in practice base is %esp */21: add offset,offset,r5 bgtctr- cr2 addi base,ssb,0 bctr/* * Flag evaluation subroutines: they have not been written for performance * since they are not often used in practice. The rule of the game was to * write them with as few branches as possible. * The first routines eveluate either one or 2 (ZF and SF simultaneously) * flags and do not use r0 and r7. * The more complex routines (_eval_above, _eval_signed and _eval_flags) * call the former ones, using r0 as a return address save register and * r7 as a safe temporary. *//* * _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid * and protected because it is possible, although it is exceptional, to have * SF and ZF set at the same time after a few instructions which may leave the * flags in this apparently inconsistent state: sahf, popf, iret and the few * (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl, * cmpxchg8b). This also solves the obscure case of ZF set and PF clear. * On return: SF=cr6[0], ZF=cr6[2]. */ _eval_sf_zf: andis. r5,flags,ZF_PROTECT>>16 rlwinm r3,flags,0,INCDEC_FIELD RES_SHIFT(r4) cntlzw r3,r3 slw r4,result,r4 srwi r5,r3,5 # ? use result : use op1 rlwinm r3,r3,2,0x18 oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16 neg r5,r5 # mux result/op2 slw r3,op2,r3 and r4,r4,r5 andc r3,r3,r5 xoris flags,flags,(SIGNED_IN_CR)>>16 bne- 1f # 12 instructions between set or r3,r3,r4 # and test, good for folding cmpwi cr6,r3,0 blr1: or. r3,r3,r4 crmove SF,0 blr/* * _eval_cf may be called at any time, no other flag is affected. * On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8. */_eval_cf: addc r3,flags,flags # CF_IN to xer[ca] RES2CF(r4) # get 8 or 16 bit carry subfe r3,result,op1 # generate PPC carry for CF_ROTCNT(r5) # preceding operation addze r3,r4 # put carry into LSB CF_POL(r4,23) # polarity & 0x100 oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16 rlwnm r3,r3,r5,23,23 # shift carry there xor r3,r3,r4 # CF <<8 xoris flags,flags,(ABOVE_IN_CR)>>16 cmplw cr4,one,r3 # sets cr4[0] blr/* * eval_of returns the overflow flag in OF_STATE field, which will be * either 001 (OF clear) or 101 (OF set), is is only called when the two * low order bits of OF_STATE are not 01 (otherwise it will work but * it is an elaborate variant of a nop with a few registers destroyed) * The code multiplexes several sources in a branchless way, was fun to write. */_eval_of: GET_ADDSUB(r4) # 0(add)/1(sub) rlwinm r3,flags,0,INCDEC_FIELD neg r4,r4 # 0(add)/-1(sub) eqv r5,result,op1 # result[]==op1[] (bit by bit) cntlzw r3,r3 # inc/dec xor r4,r4,op2 # true sign of op2 oris r5,r5,0x0808 # bits to clear clrlwi r6,r3,31 # 0(inc)/1(dec) eqv r4,r4,op1 # op1[]==op2[] (bit by bit) add r6,op2,r6 # add 1 if dec rlwinm r3,r3,2,0x18 # incdec_shift andc r4,r4,r5 # arithmetic overflow slw r3,r6,r3 # shifted inc/dec result addis r3,r3,0x8000 # compare with 0x80000000 ori r4,r4,0x0808 # bits to set cntlzw r3,r3 # 32 if inc/dec overflow OF_ROTCNT(r6) rlwimi r4,r3,18,0x00800000 # insert inc/dec overflow rlwimi flags,one,24,OF_STATE_MASK rlwnm r3,r4,r6,8,8 # get field rlwimi flags,r3,3,OF_VALUE # insert OF blr/* * _eval_pf will always be called when needed (complex but infrequent), * there are a few quirks for a branchless solution. * On return: PF=cr0[0], PF=MSB(r3) */_eval_pf: rlwinm r3,flags,0,INCDEC_FIELD rotrwi r4,op2,4 # from inc/dec rotrwi r5,result,4 # from result cntlzw r3,r3 # use result if 32 xor r4,r4,op2 xor r5,r5,result rlwinm r3,r3,26,0,0 # 32 becomes 0x80000000 clrlwi r4,r4,28 lis r6,0x9669 # constant to shift clrlwi r5,r5,28 rlwnm r4,r6,r4,0,0 # parity from inc/dec rlwnm r5,r6,r5,0,0 # parity from result andc r4,r4,r3 # select which one and r5,r5,r3 add. r3,r4,r5 # and test to simplify blr # returns in r3 and cr0 set./* * _eval_af will always be called when needed (complex but infrequent): * - if after inc, af is set when 4 low order bits of op1 are 0 * - if after dec, af is set when 4 low order bits of op1 are 1 * (or 0 after adding 1 as implemented here) * - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1 * and 4 LSB of op2 (eventually complemented) plus carry in. * - other instructions leave AF undefined so the returned value is irrelevant. * Returned value must be masked with 0x10, since all other bits are undefined. * There branchless code is perhaps not the most efficient, but quite parallel. */_eval_af: rlwinm r3,flags,0,INCDEC_FIELD clrlwi r5,op2,28 # 4 LSB of op2 addc r4,flags,flags # carry_in GET_ADDSUB(r6) cntlzw r3,r3 # if inc/dec 16..23 else 32 neg r6,r6 # add/sub clrlwi r4,r3,31 # if dec 1 else 0 xor r5,r5,r6 # conditionally complement clrlwi r6,op1,28 # 4 LSB of op1 add r4,op2,r4 # op2+(dec ? 1 : 0) clrlwi r4,r4,28 # 4 LSB of op2+(dec ? 1 : 0) adde r5,r6,r5 # op1+cy_in+(op2/~op2) cntlzw r4,r4 # 28..31 if not AF, 32 if set andc r5,r5,r3 # masked AF from add/sub... andc r4,r3,r4 # masked AF from inc/dec or r3,r4,r5 blr/* * _eval_above will only be called if ABOVE_IN_CR is not set. * On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1] */_eval_above: andis. r3,flags,ZF_IN_CR>>16 mflr r0 beql+ _eval_sf_zf andis. r3,flags,CF_IN_CR>>16 beql+ _eval_cf mtlr r0 oris flags,flags,ABOVE_IN_CR>>16 crnor ABOVE,ZF,CF blr/* _eval_signed may only be called when signed_in_cr is clear ! */_eval_signed: andis. r3,flags,SF_IN_CR>>16 mflr r0 beql+ _eval_sf_zf/* SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear */ rlwinm. r3,flags,5,0,1 xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16 bngl+ _eval_of andis. r3,flags,OF_VALUE>>16 mtlr r0 crxor SLT,SF,OF crnor SGT,SLT,ZF blr_eval_flags: mflr r0 bl _eval_cf li r7,2 rlwimi r7,r3,24,CF86,CF86 # 2 if CF clear, 3 if set bl _eval_pf andis. r4,flags,SF_IN_CR>>16 rlwimi r7,r3,32+PF-PF86,PF86,PF86 bl _eval_af rlwimi r7,r3,0,AF86,AF86 beql+ _eval_sf_zf mfcr r3 rlwinm. r4,flags,5,0,1 rlwimi r7,r3,0,DF86,SF86 ZF2ZF86(r3,r7) bngl+ _eval_of mtlr r0 lis r4,0x0004 lwz r3,eflags(state) addi r4,r4,0x7000 rlwimi r7,flags,17,OF86,OF86 and r3,r3,r4 or r3,r3,r7 blr/* Quite simple for real mode, input in r4, returns in r3. */_segment_load: lwz r5,vbase(state) rlwinm r3,r4,4,0xffff0 # segment selector * 16 add r3,r3,r5 blr/* To allow I/O port virtualization if necessary, code for exception in r3,port number in r4 */_check_port: lwz r5,ioperm(state) rlwinm r6,r4,29,0x1fff # 0 to 8kB lis r0,0xffff lhbrx r5,r5,r6 clrlwi r6,r4,29 # modulo 8 rlwnm r0,r0,r3,0x0f # 1, 3, or 0xf slw r0,r0,r6 and. r0,r0,r5 bne- complex blr/* * Instructions are in approximate functional order: * 1) move, exchange, lea, push/pop, pusha/popa * 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out * 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg * 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr * 5) jump, call, ret * 6) string instructions and xlat * 7) rotate/shift/mul/div * 8) segment register, far jumps, calls and rets, interrupts * 9) miscellenaous (flags, bcd,...) */#define MEM offset,base#define REG opreg,state#define SELECTORS 32#define SELBASES 64/* Immediate moves */movb_imm_reg: rlwinm opreg,opcode,2,28,29; lbz r3,1(eip) rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip) stbx r3,REG; GOTNEXT movw_imm_reg: lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip) sthx r3,REG; GOTNEXT movl_imm_reg: lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip) stwx r3,REG; GOTNEXT movb_imm_mem: lbz r0,1(eip); cmpwi opreg,0 lbzu opcode,2(eip); bne- ud stbx r0,MEM; GOTNEXTmovw_imm_mem: lhz r0,1(eip); cmpwi opreg,0 lbzu opcode,3(eip); bne- ud sthx r0,MEM; GOTNEXT movl_imm_mem: lwz r0,1(eip); cmpwi opreg,0 lbzu opcode,5(eip); bne- ud stwx r0,MEM; GOTNEXT/* The special short form moves between memory and al/ax/eax */movb_al_a32: lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip) stbx r0,MEM; GOTNEXT movb_al_a16: lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip) stbx r0,MEM; GOTNEXTmovw_ax_a32: lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip) sthx r0,MEM; GOTNEXTmovw_ax_a16: lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip) sthx r0,MEM; GOTNEXTmovl_eax_a32: lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip) stwx r0,MEM; GOTNEXTmovl_eax_a16: lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip) stwx r0,MEM; GOTNEXTmovb_a32_al: lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM stb r0,AL(state); GOTNEXTmovb_a16_al: lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM stb r0,AL(state); GOTNEXT movw_a32_ax: lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM sth r0,AX(state); GOTNEXTmovw_a16_ax: lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM sth r0,AX(state); GOTNEXT movl_a32_eax: lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM stw r0,EAX(state); GOTNEXTmovl_a16_eax: lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM stw r0,EAX(state); GOTNEXT/* General purpose move (all are exactly 4 instructions long) */ .align 4movb_reg_mem: lbzx r0,REG NEXTBYTE(opcode) stbx r0,MEM GOTNEXTmovw_reg_mem: lhzx r0,REG NEXTBYTE(opcode) sthx r0,MEM GOTNEXTmovl_reg_mem: lwzx r0,REG NEXTBYTE(opcode) stwx r0,MEM GOTNEXTmovb_mem_reg: lbzx r0,MEM NEXTBYTE(opcode) stbx r0,REG GOTNEXTmovw_mem_reg: lhzx r0,MEM NEXTBYTE(opcode) sthx r0,REG GOTNEXTmovl_mem_reg: lwzx r0,MEM NEXTBYTE(opcode) stwx r0,REG GOTNEXT/* short form exchange ax/eax with register */xchgw_ax_reg: clrlslwi opreg,opcode,29,2 lhz r3,AX(state) lhzx r4,REG sthx r3,REG sth r4,AX(state) NEXTxchgl_eax_reg: clrlslwi opreg,opcode,29,2 lwz r3,EAX(state) lwzx r4,REG stwx r3,REG stw r4,EAX(state) NEXT/* General exchange (unlocked!) */xchgb_reg_mem: lbzx r3,MEM lbzx r4,REG NEXTBYTE(opcode) stbx r3,REG stbx r4,MEM GOTNEXTxchgw_reg_mem: lhzx r3,MEM lhzx r4,REG sthx r3,REG sthx r4,MEM NEXTxchgl_reg_mem: lwzx r3,MEM lwzx r4,REG stwx r3,REG stwx r4,MEM NEXT/* lea, one of the simplest instructions */leaw: cmpw base,state beq- ud sthbrx offset,REG NEXT leal: cmpw base,state beq- ud stwbrx offset,REG NEXT /* Short form pushes and pops */pushw_sp_reg: li r3,SP lhbrx r4,state,r3 clrlslwi opreg,opcode,29,2 lhzx r0,REG addi r4,r4,-2 sthbrx r4,state,r3 clrlwi r4,r4,16 sthx r0,ssb,r4 NEXT pushl_sp_reg: li r3,SP lhbrx r4,state,r3 clrlslwi opreg,opcode,29,2 lwzx r0,REG addi r4,r4,-4 sthbrx r4,state,r3 clrlwi r4,r4,16 stwx r0,ssb,r4 NEXT popw_sp_reg: li r3,SP lhbrx r4,state,r3 clrlslwi opreg,opcode,29,2 lhzx r0,ssb,r4 addi r4,r4,2 # order is important in case of pop sp sthbrx r4,state,r3 sthx r0,REG NEXT popl_sp_reg: li r3,SP lhbrx r4,state,r3 clrlslwi opreg,opcode,29,2 lwzx r0,ssb,r4 addi r4,r4,4 sthbrx r4,state,r3 stwx r0,REG NEXT/* Push immediate */pushw_sp_imm: li r3,SP lhbrx r4,state,r3 lhz r0,1(eip) addi r4,r4,-2 sthbrx r4,state,r3 clrlwi r4,r4,16 lbzu opcode,3(eip) sthx r0,ssb,r4 GOTNEXT pushl_sp_imm: li r3,SP lhbrx r4,state,r3 lwz r0,1(eip) addi r4,r4,-4 sthbrx r4,state,r3 clrlwi r4,r4,16 lbzu opcode,5(eip) stwx r0,ssb,r4 GOTNEXTpushw_sp_imm8: li r3,SP lhbrx r4,state,r3 lhz r0,1(eip) addi r4,r4,-2 sthbrx r4,state,r3 clrlwi r4,r4,16 lbzu opcode,2(eip) extsb r0,r0 sthx r0,ssb,r4 GOTNEXT pushl_sp_imm8: li r3,SP lhbrx r4,state,r3 lhz r0,1(eip) addi r4,r4,-4 sthbrx r4,state,r3 clrlwi r4,r4,16 lbzu opcode,2(eip) extsb r0,r0 stwx r0,ssb,r4 GOTNEXT /* General push/pop */pushw_sp: lhbrx r0,MEM li r3,SP lhbrx r4,state,r3 addi r4,r4,-2 sthbrx r4,state,r3 clrlwi r4,r4,16 sthbrx r0,r4,ssb NEXT pushl_sp: lwbrx r0,MEM
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -