📄 phelix86.s
字号:
##point to callers first parameter (save code size below)
leal callerParms-_Phelix_LocalSize(%esp),%esi
subl $_Phelix_LocalSize,%esp #make room for locals on stack
movl %ebp,jmpTabPtr(%esp) #save jump table pointer
call InitNonce
##
##################################################################
## Finally ready to start running Phelix on some data
##################################################################
## First, process the initialization zeroes (loopByteCnt == 0 from PhelixInit)
##
movl $_ret_InitZeroDone,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp)
jmp EncryptBlk_0
##
## "local" function
.set _SO_,4
InitNonce:
## first, init the local keys on the stack
movl ctxt_Ptr_LCL(%esi),%ebp #point to context structure
movl X_1_Bump(%ebp),%edi #edi=4*(keySize/8)+256*(macSize mod 128)
movl nonce_Ptr_LCL(%esi),%edx #(const) pointer to nonce words
_push esi #save esi (push/pop = smaller than lea esi,callerParms)
xor %esi,%esi #use esi as the variable i in SetTwoKeys
inc %esi #start with i = 1, since edi = X_1 = 4*L(U) already
call SetTwoKeys #set X_1_n, X_5_n, for n=0,1 [return w/edi == 0]
call SetTwoKeys #set X_2_n, X_6_n, for n=0,1
call SetTwoKeys #set X_3_n, X_7_n, for n=0,1
xor %esi,%esi #wrap to i = 0
call SetTwoKeys #set X_0_n, X_4_n, for n=0,1
_pop esi #restore pointer to callerParms
##set up for initialization phase
xorl %ecx,%ecx
leal INIT_ZEROES,%ebp #use all zero input words, for i= -8 .. -1
leal tmpBuf+_SO_(%esp),%edi #discard output
movl %ecx,loopByteCnt+_SO_(%esp) #initialize loop byte count counter = 0
movl %ecx,_i_+_SO_(%esp) #initialize i = 0 (block number + 8)
movl %ebp,srcPtr+_SO_(%esp)
movl %edi,dstPtr+_SO_(%esp)
## now initialize the Zn register values
movl ctxt_Ptr_LCL(%esi),%ebp
movl nonce_Ptr_LCL(%esi),%edi
movl X_0+12(%ebp),%eax #get the X_0 key values
movl X_0+16(%ebp),%ebx
movl X_0+20(%ebp),%ecx
movl X_0+24(%ebp),%edx
movl X_0+28(%ebp),%esi
xorl (%edi),%eax #merge in the nonce
xorl 4(%edi),%ebx
xorl 8(%edi),%ecx
xorl 12(%edi),%edx
ret
.set _SO_,0
############################################
## done with the initial zeroes.
_ret_InitZeroDone:
.if UNROLL_CNT > ZERO_INIT_CNT #do we need to clear out the return point?
xorl %ebp,%ebp #(only if it is not already at the end)
movl %ebp,exitTab+4*(ZERO_INIT_CNT-1)+_SO_(%esp)
.endif
#################
## handle AAD here, looping if needed
xorl $MAGIC_AAD_XOR,%ebx
movl aad_Len+_SO_(%esp),%ebp
testl %ebp,%ebp
_if nz #if nothing there, skip all aad processing
movl aad_Ptr+_SO_(%esp),%edi
movl %ebp,aadLeft+_SO_(%esp)
movl %edi, srcPtr+_SO_(%esp) #src will come from aad_Ptr
_aad_Loop: #here with ebp == aad_Len
leal tmpBuf+_SO_(%esp),%edi #always use tmpBuf for aad dst (discard)
movl %edi,dstPtr+_SO_(%esp)
movl aadLeft+_SO_(%esp),%ebp
subl $4*UNROLL_CNT,%ebp #only do one unrolled loop each time
_if ae #(since we use tmpBuf to discard ciphertext)
movl %ebp,aadLeft+_SO_(%esp)
xorl %edi,%edi
movl %edi,loopByteCnt+_SO_(%esp)
movl $_aad_Loop,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp)
jmp EncryptBlk_0
_endif
## here to handle final partial loop
_aad_PartialLoop:
andl $4*(UNROLL_CNT-1),%ebp
movl %ebp,loopByteCnt+_SO_(%esp)
cmpl $4,%ebp
_if ae
movl $_ret_aad_1,exitTab-4+_SO_(%esp,%ebp)
jmp EncryptBlk_0
_ret_aad_1:
movl loopByteCnt+_SO_(%esp),%ebp
xorl %edi,%edi
movl %edi,exitTab-4+_SO_(%esp,%ebp) #clear the entry
_endif
## here to handle final partial word of AAD
movl aadLeft+_SO_(%esp),%ebp
movl %ebp,%edi
andl $3,%edi #any odd bytes?
_ifbrk z #if not, we are done with AAD
addl $4,%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl %ebp,loopByteCnt+_SO_(%esp)
_push esi
subl $4,%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl srcPtr+_SO_(%esp),%esi
movl (%esi,%ebp),%esi #get the last AAD word
andl MASK_TAB(,%edi,4),%esi #clear out extra bits
leal tmpBuf+_SO_(%esp),%edi
movl %esi,(%edi)
subl %ebp,%edi
movl %edi,dstPtr+_SO_(%esp)
movl %edi,srcPtr+_SO_(%esp)
movl $_ret_aad_2,exitTab+_SO_(%esp,%ebp)
movl %ebp,tmpBuf+4+_SO_(%esp)#save this
_pop esi
jmp *Encrypt_jmpTab(%ebp)
_ret_aad_2:
movl tmpBuf+4+_SO_(%esp),%ebp
xorl %edi,%edi
movl %edi,exitTab+_SO_(%esp,%ebp)
_endif
xorl $MAGIC_AAD_XOR,%ebx
#################
## process the user data
_startUserData:
_push esi #use esi as temp pointer
leal callerParms+_SO_(%esp),%esi # (to save code size in accessing caller parms below)
leal _ret_MAC0,%ebp
movl %ebp,retAddr_LCL(%esi)
movl src_Ptr_LCL(%esi),%ebp
movl %ebp,srcPtr+_SO_(%esp)
movl dst_Ptr_LCL(%esi),%edi
movl src_ByteCnt_LCL(%esi),%ebp
## enter here from EncryptBytes
processUserData:
movl %edi,dstPtr+_SO_(%esp)
movl %edi,dstPtr0_LCL(%esi)
movl %ebp,msgLen0_LCL(%esi)
_pop esi #restore esi
movl loopByteCnt+_SO_(%esp),%edi
andl $4*(UNROLL_CNT-1),%edi #get the loop "phase"
subl %edi,dstPtr+_SO_(%esp) #adjust pointers accordingly
subl %edi,srcPtr+_SO_(%esp)
#################
## now process the bulk of the data in "full" loop chunks (ebp = src_ByteCnt)
addl %edi,%ebp
subl $UNROLL_CNT*4,%ebp #enough for one "full" loop?
movl %ebp,loopByteCnt+_SO_(%esp) #save the pre-subtracted value for use in the loop
_if ae
add jmpTabPtr+_SO_(%esp),%edi #get ready to jump into block processing
movl $_ret_DataDone1,exitTab+4*(UNROLL_CNT-1)+_SO_(%esp)
jmp *(%edi) #go encrypt or decrypt
_ret_DataDone1:
movl loopByteCnt+_SO_(%esp),%ebp #restore ebp = loopByteCnt
xorl %edi,%edi #starting phase is at ??crypt_0 now
_endif
#################
## now process the remainder of the data, if any (partial loop)
andl $4*(UNROLL_CNT-1),%ebp #compute ebp = end phase
cmpl %edi,%ebp #any partial loop to do?
_if nz
movl %ebp,loopByteCnt+_SO_(%esp) #make sure that the exit loop test falls thru
addl jmpTabPtr+_SO_(%esp),%edi #get ready to jump
movl $_ret_DataDone2,exitTab-4+_SO_(%esp,%ebp) #force an exit at the correct point
jmp *(%edi)
_ret_DataDone2:
xorl %edi,%edi #edi = 0
movl loopByteCnt+_SO_(%esp),%ebp
andl $4*(UNROLL_CNT-1),%ebp #recompute exitTab index
movl %edi,exitTab-4+_SO_(%esp,%ebp) #clear the exitTab entry
_endif
#################
## special (i.e. UGLY!!) handling when src_ByteCnt is not a multiple of 4
## here with ebp = loopByteCnt AND 4*(UNROLL_CNT-1)
movl msgLen0+_SO_(%esp),%edi #get original msgLen
andl $3,%edi #any partial words? (hopefully rare)
_if nz
movl $_ret_OddBytes,exitTab+_SO_(%esp,%ebp)
orl %ebp,%edi #save word index and odd byte count
movl %edi,loopByteCnt+_SO_(%esp) # back into loopByteCnt
_push esi
andl $3,%edi
movl srcPtr+_SO_(%esp),%esi
addl %ebp,%esi
_push ebp
movl MASK_TAB(,%edi,4),%edi #get the mask bits
movl (%esi),%ebp #and get the source word
leal tmpBuf+_SO_(%esp),%esi
andl %edi,%ebp #ebp = masked source word
movl %edi,8(%esi) #save the mask bits (for use in Decrypt_OddBytes)
movl %ebp, (%esi) #save the masked source word
_pop ebp
subl %ebp,%esi #adjust src/dst ptrs for hard coded offsets in block code
movl %esi,srcPtr+_SO_(%esp) #set up for "single-word" encrypt in tmpBuf[]
addl $4,%esi
movl %esi,dstPtr+_SO_(%esp)
mov jmpTabPtr+_SO_(%esp),%edi #dispatch to different handler for Encrypt & Decrypt
_pop esi
jmp *OddBytes_OFFS(%edi)
##
## here to handle the odd-byte encrypt case
Encrypt_OddBytes:
jmp *Encrypt_jmpTab(%ebp) #go encrypt the single word
##
## here to handle the funky odd-byte decrypt case
Decrypt_OddBytes:
## we have to encrypt halfway thru the block to compute keystream :-((
## (i.e., in order to produce the "full" ciphertext word)
_push eax,ebx,ecx,edx,esi,ebp
_o_ "addl %edx,%eax","roll $ROT_3b,%edx","mov X_i_0+_SO_(%esp,%ebp),%ebp" #get the key word
_o_ "addl %esi,%ebx","roll $ROT_4b,%esi"
_o_ "xorl %eax,%ecx","roll $ROT_0a,%eax"
_o_ "xorl %ebx,%edx" ,"add %edx,%ebp"
_o_ "addl %ecx,%esi","roll $ROT_2a,%ecx","mov loopByteCnt+_SO_(%esp),%edi"
_o_ "xorl %ebp,%eax","roll $ROT_4a,%esi","and $4*3,%edi"
_o_ "addl %eax,%ecx" ,"mov oldZ+_SO_(%esp,%edi),%ebp"
_o_ "xorl %ecx,%esi"
addl %esi,%ebp #now ebp = keystream
movl tmpBuf+8+_SO_(%esp),%edi #get the mask word
notl %edi #toggle the maskbits
andl %ebp,%edi #mask off unused maskbits
xorl %edi,tmpBuf+_SO_(%esp) #re-create the "full" ciphertext word @ tmp src buffer
_pop ebp,esi,edx,ecx,ebx,eax
jmp *Decrypt_jmpTab(%ebp) #go decrypt
## "return" here with the dest word computed at [tmpBuf+4]
_ret_OddBytes:
_push esi,eax
leal callerParms+_SO_(%esp),%esi
xorl %edi,%edi
movl loopByteCnt+_SO_(%esp),%ebp
andl $4*(UNROLL_CNT-1),%ebp
movl %edi,exitTab+_SO_(%esp,%ebp) #clear out the exitTab entry we just used
movl msgLen0+_SO_(%esp),%edi #now output just the number of dst bytes specified
movl %edi,%ebp
andl $3,%ebp
xorl %ebp,%edi #clear low 2 bits of count
addl dstPtr0_LCL(%esi),%edi #point to "final" word offset
movl tmpBuf_LCL+4(%esi),%eax #get the dst output word (short offset)
xorl (%edi),%eax #do bit diddling to output just the odd bytes
andl MASK_TAB(,%ebp,4),%eax
xorl %eax,(%edi)
_pop eax,esi
_endif
jmp *retAddr+_SO_(%esp) #"return" to whomever
_ret_MAC0:
#################
## here to compute and output/compare the MAC
movl mac_Ptr+_SO_(%esp),%ebp
xorl aad_Len+_SO_(%esp),%esi
processMAC:
movl %ebp,dstPtr0+_SO_(%esp) #save MAC ptr
xorl $MAGIC_MAC_XOR,%eax #toggle bits to start the MAC
_push esi
movl loopByteCnt+_SO_(%esp),%ebp
movl %ebp,%edi
addl $3,%ebp #advance to next full word, if odd bytes
andl $4*(UNROLL_CNT-1),%ebp #ebp = next word "offset" within block
andl $3,%edi #edi = length of src mod 4 (plaintext for MAC)
leal tmpBuf+_SO_(%esp),%esi
.set _bb_,0
.rept 12 #8 for padding, 4 for MAC size
movl %edi,_bb_(%esi) #fill tmpBuf with L(P) mod 4
.set _bb_,_bb_+4
.endr
leal 7*4(%ebp),%edi
andl $4*(UNROLL_CNT-1),%edi #stop point is after 8 blocks (i+0..i+7)
movl $_ret_MAC1,exitTab+_SO_(%esp,%edi)
subl %ebp,%esi #set up source/dest pointers
movl %esi,srcPtr+_SO_(%esp)
movl %esi,dstPtr+_SO_(%esp)
addl $8*4-1,%ebp #FUNKY wrap logic requires -1
movl %ebp,loopByteCnt+_SO_(%esp)
incl %ebp #undo adjustment
andl $4*(UNROLL_CNT-1),%ebp
_pop esi
jmp *Encrypt_jmpTab(%ebp) #go do the encryption
## just finished eight blocks of "padding" using L(P) mod 4
## now generate the MAC
_ret_MAC1:
movl loopByteCnt+_SO_(%esp),%ebp
incl %ebp #undo the -1 above
andl $4*(UNROLL_CNT-1),%ebp
leal 3*4(%ebp),%edi #do four more (0..3 -- stop after #3)
andl $4*(UNROLL_CNT-1),%edi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -