📄 phelix86.asm
字号:
and Z4,MASK_TAB[4*t1] ;clear out extra bits
lea t1,tmpBuf
mov [t1],Z4
sub t1,t0
mov dstPtr,t1
mov srcPtr,t1
mov exitTab[t0],offset _ret_aad_2
mov tmpBuf[4],t0 ;save this
_pop Z4
jmp Encrypt_jmpTab[t0]
_ret_aad_2:
mov t0,tmpBuf[4]
xor t1,t1
mov exitTab[t0],t1
_endif
xor Z1,MAGIC_AAD_XOR
;;;;;;;;;;;;;;;;;
; process the user data
_push Z4 ;use Z4 as temp pointer
lea Z4,callerParms ; (to save code size in accessing caller parms below)
lea t0,_ret_MAC0
mov retAddr_Z4,t0
mov t0,src_Ptr_Z4
mov srcPtr,t0
mov t1,dst_Ptr_Z4
mov t0,src_ByteCnt_Z4
; enter here from EncryptBytes
processUserData:
mov dstPtr,t1
mov dstPtr0_Z4,t1
mov msgLen0_Z4,t0
_pop Z4 ;restore Z4
mov t1,loopByteCnt
and t1,4*(UNROLL_CNT-1) ;get the loop "phase"
sub dstPtr,t1 ;adjust pointers accordingly
sub srcPtr,t1
;;;;;;;;;;;;;;;;;
; now process the bulk of the data in "full" loop chunks (t0 = src_ByteCnt)
add t0,t1
sub t0,UNROLL_CNT*4 ;enough for one "full" loop?
mov loopByteCnt,t0 ;save the pre-subtracted value for use in the loop
_if ae
add t1,jmpTabPtr ;get ready to jump into block processing
mov exitTab[4*(UNROLL_CNT-1)],offset _ret_DataDone1
jmp dword ptr [t1] ;go encrypt or decrypt
_ret_DataDone1:
mov t0,loopByteCnt ;restore t0 = loopByteCnt
xor t1,t1 ;starting phase is at ??crypt_0 now
_endif
;;;;;;;;;;;;;;;;;
; now process the remainder of the data, if any (partial loop)
and t0,4*(UNROLL_CNT-1) ;compute t0 = end phase
cmp t0,t1 ;any partial loop to do?
_if nz
mov loopByteCnt,t0 ;make sure that the exit loop test falls thru
add t1,jmpTabPtr ;get ready to jump
mov exitTab[t0-4],offset _ret_DataDone2 ;force an exit at the correct point
jmp dword ptr [t1]
_ret_DataDone2:
xor t1,t1 ;t1 = 0
mov t0,loopByteCnt
and t0,4*(UNROLL_CNT-1) ;recompute exitTab index
mov exitTab[t0-4],t1 ;clear the exitTab entry
_endif
;;;;;;;;;;;;;;;;;
; special (i.e. UGLY!!) handling when src_ByteCnt isn't a multiple of 4
; here with t0 = loopByteCnt AND 4*(UNROLL_CNT-1)
mov t1,msgLen0 ;get original msgLen
and t1,3 ;any partial words? (hopefully rare)
_if nz,far
mov exitTab[t0],offset _ret_OddBytes
or t1,t0 ;save word index and odd byte count
mov loopByteCnt,t1 ; back into loopByteCnt
_push Z4
and t1,3
mov Z4,srcPtr
add Z4,t0
_push t0
mov t1,MASK_TAB[4*t1] ;get the mask bits
mov t0,dword ptr [Z4] ;and get the source word
lea Z4,tmpBuf
and t0,t1 ;t0 = masked source word
mov dword ptr [Z4+8],t1 ;save the mask bits (for use in Decrypt_OddBytes)
mov dword ptr [Z4 ],t0 ;save the masked source word
_pop t0
sub Z4,t0 ;adjust src/dst ptrs for hard coded offsets in block code
mov srcPtr,Z4 ;set up for "single-word" encrypt in tmpBuf[]
add Z4,4
mov dstPtr,Z4
mov t1,jmpTabPtr ;dispatch to different handler for Encrypt & Decrypt
_pop Z4
jmp dword ptr OddBytes_OFFS[t1]
;
; here to handle the odd-byte encrypt case
Encrypt_OddBytes:
jmp Encrypt_jmpTab[t0] ;go encrypt the single word
;
; here to handle the funky odd-byte decrypt case
Decrypt_OddBytes:
; we have to encrypt halfway thru the block to compute keystream :-((
; (i.e., in order to produce the "full" ciphertext word)
_push Z0,Z1,Z2,Z3,Z4,t0
_o_ <add Z0,Z3>,<rol Z3,ROT_3b>,<mov t0,X_i_0[t0]> ;get the key word
_o_ <add Z1,Z4>,<rol Z4,ROT_4b>
_o_ <xor Z2,Z0>,<rol Z0,ROT_0a>
_o_ <xor Z3,Z1> ,<add t0,Z3>
_o_ <add Z4,Z2>,<rol Z2,ROT_2a>,<mov t1,loopByteCnt>
_o_ <xor Z0,t0>,<rol Z4,ROT_4a>,<and t1,4*3>
_o_ <add Z2,Z0> ,<mov t0,oldZ[t1]>
_o_ <xor Z4,Z2>
add t0,Z4 ;now t0 = keystream
mov t1,tmpBuf[8] ;get the mask word
not t1 ;toggle the maskbits
and t1,t0 ;mask off unused maskbits
xor tmpBuf,t1 ;re-create the "full" ciphertext word @ tmp src buffer
_pop t0,Z4,Z3,Z2,Z1,Z0
jmp Decrypt_jmpTab[t0] ;go decrypt
; "return" here with the dest word computed at [tmpBuf+4]
_ret_OddBytes:
_push Z4,Z0
lea Z4,callerParms
xor t1,t1
mov t0,loopByteCnt
and t0,4*(UNROLL_CNT-1)
mov exitTab[t0],t1 ;clear out the exitTab entry we just used
mov t1,msgLen0 ;now output just the number of dst bytes specified
mov t0,t1
and t0,3
xor t1,t0 ;clear low 2 bits of count
add t1,dstPtr0_Z4 ;point to "final" word offset
mov Z0,tmpBuf_Z4[4] ;get the dst output word (short offset)
xor Z0,dword ptr [t1] ;do bit diddling to output just the odd bytes
and Z0,MASK_TAB[4*t0]
xor dword ptr [t1],Z0
_pop Z0,Z4
_endif
jmp retAddr ;"return" to whomever
_ret_MAC0:
;;;;;;;;;;;;;;;;;
; here to compute and output/compare the MAC
mov t0,mac_Ptr
xor Z4,aad_Len
processMAC:
mov dstPtr0,t0 ;save MAC ptr
xor Z0,MAGIC_MAC_XOR ;toggle bits to start the MAC
_push Z4
mov t0,loopByteCnt
mov t1,t0
add t0,3 ;advance to next full word, if odd bytes
and t0,4*(UNROLL_CNT-1) ;t0 = next word "offset" within block
and t1,3 ;t1 = length of src mod 4 (plaintext for MAC)
lea Z4,tmpBuf
_ASM_Assert <UNROLL_CNT ge 8>
_bb_ = 0
rept 8+4 ;8 for padding, 4 for MAC size
mov [Z4+_bb_],t1 ;fill tmpBuf with L(P) mod 4
_bb_ = _bb_ + 4
endm
lea t1,[t0+7*4]
and t1,4*(UNROLL_CNT-1) ;stop point is after 8 blocks (i+0..i+7)
mov exitTab[t1],offset _ret_MAC1
sub Z4,t0 ;set up source/dest pointers
mov srcPtr,Z4
mov dstPtr,Z4
add t0,8*4-1 ;FUNKY wrap logic requires -1
mov loopByteCnt,t0
inc t0 ;undo adjustment
and t0,4*(UNROLL_CNT-1)
_pop Z4
jmp Encrypt_jmpTab[t0] ;go do the encryption
; just finished eight blocks of "padding" using L(P) mod 4
; now generate the MAC
_ret_MAC1:
mov t0,loopByteCnt
inc t0 ;undo the -1 above
and t0,4*(UNROLL_CNT-1)
lea t1,[t0+3*4] ;do four more (0..3 -- stop after #3)
and t1,4*(UNROLL_CNT-1)
mov exitTab[t1],offset _ret_MAC2
lea t1,[t0+4*4-1] ;FUNKY wrap logic requires -1
mov loopByteCnt,t1
jmp Encrypt_jmpTab[t0]
;
; here with the MAC computed. Z0..Z4 now can be trashed
_ret_MAC2:
lea Z4,callerParms
mov t1,ctxt_Ptr_Z4
mov ecx,[t1].pCtxt.macSize ;ecx = # bits in MAC
mov edi,dstPtr0_Z4
lea esi,tmpBuf[8*4]
test ecx,31 ;can we do it one word at a time?
_if z
shr ecx,5 ;if so, it's faster
rep movsd
_else
add ecx,7 ;round up to byte boundary
shr ecx,3 ;non-word sizes get the slow treatment
rep movsb
_endif
;;;;;;;;;;;;;;;;;
;tear down the stack and return
add esp,_Phelix_LocalSize
popad ;restore all of callers regs
ret ;and return to caller
;;;;;;;;;;;;;;;;
if 0
PhelixCompareMAC:
xor Z0,[t0 ] ;do a comparison
xor Z1,[t0+ 4]
xor Z2,[t0+ 8]
xor Z3,[t0+12]
mov t0,ctxt_Ptr
mov t0,[t0].pCtxt.macSize
cmp t0,127 ;are we doing a full MAC?
_rept be ;if not, we must do some masking
mov t1,1
xchg ecx,t0
shl t1,cl
xchg ecx,t0
dec t1 ;t1 = mask bits
cmp t0,96
_if ae
and Z3,t1 ;here for 96..127 bits
_brk
_endif
cmp t0,64
_if ae
xor Z3,Z3 ;here for 64..95 bits
and Z2,t1
_brk
_endif
cmp t0,32
_if ae
xor Z3,Z3 ;here for 32..63 bits
xor Z2,Z2
and Z1,t1
_else
xor Z3,Z3 ;here for 0..31 bits
xor Z2,Z2
xor Z1,Z1
and Z0,t1
_endif
_until ;;always fall thru here (i.e., _rept == _if here)
or Z0,Z1
or Z2,Z3
or Z0,Z2
endif
;
;----------------------------------------------------------------
; Common subroutine (for use in Phelix_Main) to init subkeys
;----------------------------------------------------------------
; In: t0 --> pCtxt (const)
; Z3 --> nonce (const)
; t1 = X' value for I
; Z4 = value of I (0..3)
; Out: Z4 incremented. t0, Z3 unmodified
; t1 = oldZ[I] = 0
; X_i_0, X_i_1 set on stack for both i=I and i=I+4
; t1
_stack_offs = 12 ;two words on stack before call
SetTwoKeys proc
_ii_ equ Z4
mov Z0,[t0+4*_ii_+4*0].pCtxt.X_0;load two key values
mov Z1,[t0+4*_ii_+4*4].pCtxt.X_0
mov X_i_0 [4*_ii_+4*0],Z0 ;store the X_i_0 values
mov X_i_0 [4*_ii_+4*4],Z1
mov Z2,[Z3+4*_ii_] ;get Z2 = N_i
add Z0,t1 ;add in 4*L(U), for _ii_ == 1
add Z1,t1
add Z1,Z2 ;add/sub the nonce value
sub Z0,Z2
add Z0,_ii_
xor t1,t1 ;set t1 = 0
mov X_i_1 [4*_ii_+4*0],Z1 ;store the X_i_1 values
mov X_i_1 [4*_ii_+4*4],Z0
mov oldZ [4*_ii_],t1 ;zero out the oldZ values
_NN_ = 0
_ASM_Assert <UNROLL_CNT ge 4>,<exitTab init code>
rept UNROLL_CNT/4 ;init the "block exit" jump table: all zeroes
mov exitTab [4*_ii_+_NN_],t1
_NN_ = _NN_ + 16
endm
inc _ii_ ;bump the counter for next call
ret
SetTwoKeys endp
;
_stack_offs = 0 ;back to no offset
;
_CommonCodeEnd:
;
;----------------------------------------------------------------
; Encryption routines
;----------------------------------------------------------------
;
align 4
C_global PhelixEncryptPacket,ECRYPT_AE_encrypt_packet
PhelixAlgo Encrypt ;instantiate the algorithm ocde
;
;the main block processing loop
;
_rept
_blkNum_ = 0 ;compile-time variable
rept UNROLL_CNT ;compile-time macro expansion
Concat EncryptBlk_,%_blkNum_,<:> ;make a label for re-entry points
_bb_ = _blkNum_ and 7 ;support UNROLL_CNT > 8
_o_ <add Z0,Z3>,<rol Z3,ROT_3b>,<mov t0,X_i_0[4*_bb_]>
_o_ <add Z1,Z4>,<rol Z4,ROT_4b>
_o_ <xor Z2,Z0>,<rol Z0,ROT_0a>,<mov t1,srcPtr>
_o_ <xor Z3,Z1>,<rol Z1,ROT_1a>,<add t0,Z3> ;does LEA opcode help here?
_o_ <add Z4,Z2>,<rol Z2,ROT_2a>
_o_ <xor Z0,t0>,<rol Z3,ROT_3a>,<mov t0,[t1+4*_bb_]> ;t0 = plaintext
_o_ <xor Z1,Z4>,<rol Z4,ROT_4a>,<mov t1,oldZ[4*(_bb_ and 3)]>
_o_ <add Z2,Z0>,<rol Z0,ROT_0b>
_o_ <add Z3,Z1>,<rol Z1,ROT_1b>,<xor t0,Z3>
_o_ <xor Z4,Z2>,<rol Z2,ROT_2b>
add t1,Z4 ;now t1 = keystream
xor t1,Z3 ;set up to compute t1 = ciphertext below
_o_ <add Z0,t0>,<rol Z3,ROT_3b>,<xor t1,t0> ;now t1 = ciphertext
_o_ <add Z1,Z4>,<rol Z4,ROT_4b>,<mov t0,X_i_1[4*_bb_]>
_o_ <xor Z2,Z0>,<rol Z0,ROT_0a>
_o_ <xor Z3,Z1>,<rol Z1,ROT_1a>,<add t0,_i_>
_o_ <add Z4,Z2>,<rol Z2,ROT_2a>,<lea t0,[t0+Z3+_bb_]>
_o_ <xor Z0,t0>,<rol Z3,ROT_3a>,<mov t0,dstPtr>
_o_ <xor Z1,Z4>,<rol Z4,ROT_4a>
_o_ <add Z2,Z0>,<rol Z0,ROT_0b>,<mov [t0+4*_bb_],t1> ;save ciphertext
_o_ <add Z3,Z1>,<rol Z1,ROT_1b>,<mov t1,exitTab[4*_blkNum_]>;?<_blkNum_ lt (UNROLL_CNT-1)>
_o_ <xor Z4,Z2>,<rol Z2,ROT_2b>
PhelixEarlyExit t1,_blkNum_ ;do we need to do an early exit? If so, do it
_blkNum_ = _blkNum_+1 ;update compile-time variable
endm ;end (compile-time) rept above
PhelixEndLoop UNROLL_CNT ;set condition code for _until below
_until b
jmp exitTab[4*(UNROLL_CNT-1)] ;"return" to do more
;
;----------------------------------------------------------------
; Decryption routine
;----------------------------------------------------------------
;
align 4
C_global PhelixDecryptPacket,ECRYPT_AE_decrypt_packet
PhelixAlgo Decrypt ;instantiate the algorithm ocde
;
;the main block processing loop
;
_rept
_blkNum_ = 0 ;compile-time variable
rept UNROLL_CNT ;compile-time macro expansion
Concat DecryptBlk_,%_blkNum_,<:> ;make a label for re-entry points
_bb_ = _blkNum_ and 7 ;support UNROLL_CNT > 8
_o_ <add Z0,Z3>,<rol Z3,ROT_3b>,<mov t0,X_i_0[4*_bb_]>
_o_ <add Z1,Z4>,<rol Z4,ROT_4b>
_o_ <xor Z2,Z0>,<rol Z0,ROT_0a>,<mov t1,srcPtr>
_o_ <xor Z3,Z1>,<rol Z1,ROT_1a>,<add t0,Z3>
_o_ <add Z4,Z2>,<rol Z2,ROT_2a>
_o_ <xor Z0,t0>,<rol Z3,ROT_3a>,<mov t0,[t1+4*_bb_]> ;t0 = ciphertext
_o_ <xor Z1,Z4>,<rol Z4,ROT_4a>,<mov t1,oldZ[4*(_bb_ and 3)]>
_o_ <add Z2,Z0>,<rol Z0,ROT_0b>
_o_ <add Z3,Z1>,<rol Z1,ROT_1b>
_o_ <xor Z4,Z2>,<rol Z2,ROT_2b>
add t1,Z4 ;set t1 = keystream
xor t1,t0 ;now t1 = plaintext
mov t0,Z3
xor t0,t1 ;now t0 = plaintext ^ z3
_o_ <add Z0,t0>,<rol Z3,ROT_3b>
_o_ <add Z1,Z4>,<rol Z4,ROT_4b>,<mov t0,X_i_1[4*_bb_]>
_o_ <xor Z2,Z0>,<rol Z0,ROT_0a>
_o_ <xor Z3,Z1>,<rol Z1,ROT_1a>,<add t0,_i_>
_o_ <add Z4,Z2>,<rol Z2,ROT_2a>,<lea t0,[t0+Z3+_bb_]>
_o_ <xor Z0,t0>,<rol Z3,ROT_3a>,<mov t0,dstPtr>
_o_ <xor Z1,Z4>,<rol Z4,ROT_4a>
_o_ <add Z2,Z0>,<rol Z0,ROT_0b>,<mov [t0+4*_bb_],t1> ;save plaintext computed above
_o_ <add Z3,Z1>,<rol Z1,ROT_1b>,<mov t1,exitTab[4*_blkNum_]>;?<_blkNum_ lt (UNROLL_CNT-1)>
_o_ <xor Z4,Z2>,<rol Z2,ROT_2b>
PhelixEarlyExit t1,_blkNum_ ;do we need to do an early exit? If so, do it
_blkNum_ = _blkNum_+1 ;update compile-time variable
endm ;end (compile-time) rept above
PhelixEndLoop UNROLL_CNT ;set condition code for _until below
_until b
jmp exitTab[4*(UNROLL_CNT-1)] ;"return" to do more
;
_PhelixCodeEnd_:
ifdef PHELIX_INCREMENTAL_API
;
;----------------------------------------------------------------
; "Incremental" function: SetupNonce
;----------------------------------------------------------------
; use same stack as EncryptPacket!
;
C_global PhelixSetupNonce,ECRYPT_AE_ivsetup
pushad
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -