⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 2fish_86.asm

📁 Assemble source code for twofish algorithm
💻 ASM
📖 第 1 页 / 共 4 页
字号:
	page	60,160
	title	Twofish for 386+, Author: Doug Whiting, Hi/fn
;  
; WARNING: This module is written for speed, not clarity! 
;
	.386
	.MODEL FLAT, C 

	public	blockEncrypt_86,blockDecrypt_86,reKey_86

	.DATA
blockEncrypt_86 dd  E_SelectCPU				;first time thru, use get_cpu_type
blockDecrypt_86 dd  D_SelectCPU
reKey_86		dd	R_SelectCPU

	extrn	MDStab:dword					;MDS multiply matrix (pre-permuted)
	extrn	P8x8:byte						;two fixed 8x8 permutations

q0	equ		P8x8[0]
q1	equ		P8x8[256]

MOVS_MASK	equ	15							;optimized Pentium Pro movsd alignment
Pentium		equ	586							;defines for cpuName
PentiumPro	equ	686

DO_COMPILE	equ	1							;for debugging performance
DO_PATCH	equ	1

	.CODE

;
; ML syntax:
;  ml.exe -Flt2fish_86.lst -coff -Cx -Zi -Zm -c -DMASM6 2fish_86.asm
;----------------------------------------------------------------------------------
;			Useful general macros
;----------------------------------------------------------------------------------
OFFSET32		equ		1					;force 32-bit strucmac stuff
	.xlist									;don't expand strucmac defns
	include		strucmac.inc
	.list

BIG_TAB		=	0

KM_ZERO		=	1							;KEY_MODE bits
KM_MIN		=	2
KM_PART		=	4
KM_FULL		=	8
KM_COMPILE	=	16

  ifdef PART_KEY
  %out	Assembly with PART_KEY
KEY_MODE	=	KM_PART
  elseifdef MIN_KEY
  %out	Assembly with MIN_KEY
KEY_MODE	=	KM_MIN
BIG_TAB		=	0							;can't use big table in min key mode

  elseifdef ZERO_KEY
  %out	Assembly with ZERO_KEY
KEY_MODE	=	KM_ZERO
sboxK8		equ	<ks.fullSbox>				;use S-box to expand keys for PentiumPro
  elseifdef COMPILE_KEY
KEY_MODE	=	KM_COMPILE
  else	;default is full key
  %out	Assembly with FULL_KEY
KEY_MODE	=	KM_FULL
  endif

;
; where the permutations are used
;
P_00	equ		q1							;"outermost" permutation (in MDSmat)
P_01	equ		q0
P_02	equ		q0	
P_03	equ		q1
P_04	equ		q1

P_10	equ		q0
P_11	equ		q0
P_12	equ		q1
P_13	equ		q1
P_14	equ		q0

P_20	equ		q1
P_21	equ		q1
P_22	equ		q0
P_23	equ		q0
P_24	equ		q0

P_30	equ		q0
P_31	equ		q1
P_32	equ		q1
P_33	equ		q0
P_34	equ		q1
;
; useful in "splicing" names together
concat	macro	aa,bb,cc,dd,ee,ff,gg,hh,ii,jj,kk	;;__TRANSPARENT__ (signal to LST2ASM)
aa&bb&cc&dd&ee&ff&gg&hh&ii&jj&kk
endm

alloc	macro	varName,varType,varSize		;macro to help define parms/locals
	irp XX,<%varOffs>
varName equ varType ptr [esp+XX]
	endm
varOffs =	varOffs+varSize;
endm

; copy from src to dst (use eax,ebx)
Copy8	macro	dst,src
  ifnb <src>
		mov		eax,src						;load source dwords(if needed)
		mov		ebx,src+4
  endif	
		mov		dst,eax						;store new dst dwords
		mov		dst+4,ebx
endm

;
;dst = xorA ^ xorb ^ xorC	(8 bytes, xorC may be blank)
;Returns two dst dwords in reg0,reg1.  Trashes ecx,edx
;oldDst = where to copy current dst value before overwriting (for CBC mode)
Xor8	macro	lbl,cpuName,reg0,reg1,dst,xorA,subKeyIndex,xorC,oldDst
  ifnb <xorA>
		mov		reg0,xorA		;load both A dwords (unless already loaded)
		mov		reg1,xorA+4
  endif
  if (KEY_MODE and KM_COMPILE) eq 0
		mov		ecx,ks.subKeys[subKeyIndex]	;load both B dwords
		mov		edx,ks.subKeys[subKeyIndex+4]

		xor		reg0,ecx		;compute A ^ B
		xor		reg1,edx
  else
		xor		reg0,12345678h	;do an xor with immediate values
concat lbl,_SK_,%((subKeyIndex)/4),<_>,cpuName,< label dword>
		xor		reg1,12345678h
concat lbl,_SK_,%((subKeyIndex)/4+1),<_>,cpuName,< label dword>
  endif
  ifnb <xorC>
    if cpuName eq Pentium
		mov		ecx,xorC		;load both C dwords
		mov		edx,xorC+4

		xor		reg0,ecx		;compute A ^ B ^ C
		xor		reg1,edx
	else
		xor		reg0,xorC
		xor		reg1,xorC+4
    endif
  endif
  ifnb <dst>
    ifnb <oldDst>
	 if (cpuName ne Pentium) and (KEY_MODE and KM_COMPILE)
	    mov		ebp,dst
		mov		oldDst,ebp
		mov		ebp,dst+4
		mov		oldDst+4,ebp
	  else
		mov		ecx,dst			;pick up previous ciphertext value
		mov		edx,dst+4

		mov		oldDst,ecx		;and make copy (for next IV)
		mov		oldDst+4,edx
	  endif
    endif
		mov		dst,reg0		;save the result
		mov		dst+4,reg1		;(and return in eax,ebx)
  endif
endm
;
;----------------------------------------------------------------------------------
;			Definitions, Structures (AES.H)
;----------------------------------------------------------------------------------
;
BLOCK_SIZE		equ		128
MAX_KEY_BITS	equ		256
MAX_ROUNDS		equ		16

INPUT_WHITEN	equ		0
OUTPUT_WHITEN	equ		(BLOCK_SIZE/8)
ROUND_SUBKEYS	equ		(OUTPUT_WHITEN+BLOCK_SIZE/8)
TOTAL_SUBKEYS	equ		((ROUND_SUBKEYS/4)+2*MAX_ROUNDS)
SUBKEY_SIZE		equ		<4*TOTAL_SUBKEYS>

DIR_ENCRYPT		equ		0
DIR_DECRYPT		equ		1
MODE_ECB		equ		1
MODE_CBC		equ		2
MODE_CFB1		equ		3
  if KEY_MODE and KM_COMPILE
VALID_SIG		equ		504D4F43h	;'COMP'
CSIG_1			=		VALID_SIG + 12345678H
CSIG_2			=		VALID_SIG XOR -1
  else
VALID_SIG		equ		48534946h	;'FISH'
  endif

cipherInstance struc
mode			db		4 dup  (?)			;MODE_ECB, MODE_CBC (Let C code handle MODE_CFB1)
CFB1_IV			db		BLOCK_SIZE/8 dup (?);CFB1 IV bytes
cipherSig		dd		?					;should be VALID_SIG
IV32			dd		BLOCK_SIZE/32 dup (?);CBC  IV dwords
cipherInstance ends

keyInstance struc
direction		db		4 dup (?)			;DIR_ENCRYPT or DIR_DECRYPT
keyLen			dd		?					;length of the key in bits
keyMaterial		db		68 dup (?)			;ASCII key material
keySig			dd		?					;should be VALID_SIG
numRounds		dd		?					;should be 16
key32			dd		MAX_KEY_BITS/32 dup (?) 
sboxKeys		dd		MAX_KEY_BITS/64 dup (?)
subKeys			dd		TOTAL_SUBKEYS	dup (?)
fullSbox		dd		4*256 dup (?)		;S-box plus MDS
  if KEY_MODE and KM_COMPILE
cSig1			dd		?					;show whether we're compiled
encryptFuncPtr	dd		?					;ptr to encrypt function
decryptFuncPtr	dd		?					;ptr to encrypt function
codeSize		dd		?					;sizeof cipherProcCode
cSig2			dd		?
cipherProcCode	dd		(4600/4) dup (?)	;compiled code itself
  endif
keyInstance ends

;
; To minimize code space, bias ebp to generate as many 8-bit offsets as possible
;
BIAS_VAL		=		(fullSbox-(TOTAL_SUBKEYS*2))
biasEBP			equ		<add ebp,BIAS_VAL>
ks				equ		<[ebp-BIAS_VAL]>

;
;----------------------------------------------------------------------------------
;	KeyMode-dependent definitions
;----------------------------------------------------------------------------------

SBOX_SIZE	equ		1024
  if KEY_MODE and (KM_FULL or KM_COMPILE)
Sbump0		equ		0
Sbump1		equ		4
Sbump2		equ		(2*SBOX_SIZE)
Sbump3		equ		(2*SBOX_SIZE+4)
S32_0		equ		<ks.fullSbox[Sbump0]>
S32_1		equ		<ks.fullSbox[Sbump1]>
S32_2		equ		<ks.fullSbox[Sbump2]>
S32_3		equ		<ks.fullSbox[Sbump3]>
SBS			=		8
EDX_ADJUST	=		100h		;optimize for Pentium code size!
else
S32_0		equ		<MDStab>
S32_1		equ		<MDStab[SBOX_SIZE]>
S32_2		equ		<MDStab[SBOX_SIZE*2]>
S32_3		equ		<MDStab[SBOX_SIZE*3]>
S8_0		equ		<byte ptr ks.fullSbox>
S8_1		equ		<byte ptr ks.fullSbox[100h]>
S8_2		equ		<byte ptr ks.fullSbox[200h]>
S8_3		equ		<byte ptr ks.fullSbox[300h]>
SBS			=		4
EDX_ADJUST	=		300h		;optimize for Pentium code size!
  endif

EAX_ADJUST	=		0			;these defns make handling EDX_ADJUST easier
EBX_ADJUST	=		0
ECX_ADJUST	=		0
;
;----------------------------------------------------------------------------------
;	Macros for dealing with non-full keying S-boxes
;----------------------------------------------------------------------------------
;
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
lookupS8 macro	cpuName,R,i			;__TRANSPARENT__
  ifnb <R>
   ifdif <cpuName>,<PentiumPro>
	mov		R&L,S8_&i[E&R&X-E&R&X_ADJUST]
   else
	movzx	R,S8_&i[R]
   endif
  endif
endm

keyXor8	macro	cpuName,R,I			;__TRANSPARENT__
  ifnb <R>
   ifdif <cpuName>,<PentiumPro>
	xor		R&L,byte ptr ks.sboxKeys[I]
   else
	xor		R,sboxK8[4*I]			;the byte is expanded to 32 bits here
   endif
  endif
endm

lookupQ	macro	cpuName,R,i,j		;__TRANSPARENT__
  ifnb <R>
   ifdif <cpuName>,<PentiumPro>
    mov		R&L,P_&i&j[E&R&X-E&R&X_ADJUST]
   else
    movzx	R,P_&i&j[R]
   endif
  endif
endm

doS4	macro	cpuName,W,R1,R2,i1,i2,R3,R4,i3,i4	;__TRANSPARENT__
	lookupQ		cpuName,R1,i1,%(W)
	lookupQ		cpuName,R2,i2,%(W)
	lookupQ		cpuName,R3,i3,%(W)
	lookupQ		cpuName,R4,i4,%(W)

    keyXor8		cpuName,R1,%(i1+4*W-4)
    keyXor8		cpuName,R2,%(i2+4*W-4)
    keyXor8		cpuName,R3,%(i3+4*W-4)
    keyXor8		cpuName,R4,%(i4+4*W-4)
endm

; this macro is a nop for full keying
doSbox	macro	keySize,cpuName,R1,R2,i1,i2,R3,R4,i3,i4
 if KEY_MODE and (KM_PART or KM_MIN)	;use 8-bit precomputed S-box
	lookupS8	cpuName,R1,i1
	lookupS8	cpuName,R2,i2
	lookupS8	cpuName,R3,i3
	lookupS8	cpuName,R4,i4
 elseif KEY_MODE and KM_ZERO
  if keySize gt 128
   if keySize gt 192
	doS4		cpuName,4,R1,R2,i1,i2,R3,R4,i3,i4
   endif
	doS4		cpuName,3,R1,R2,i1,i2,R3,R4,i3,i4
  endif
	doS4		cpuName,2,R1,R2,i1,i2,R3,R4,i3,i4
 endif
 if KEY_MODE and (KM_MIN or KM_ZERO)	;run bytes thru "next-to-last" 8-bit permutation, key xor
	doS4		cpuName,1,R1,R2,i1,i2,R3,R4,i3,i4
 endif
endm
endif	;!(KM_FULL or KM_COMPILE)

GetSubkey	macro	lbl,dstReg,skOffs	;__TRANSPARENT__
  if KEY_MODE and KM_ZERO
	mov		dstReg,roundNum
	mov		dstReg,ks.subKeys[8*dstReg+8*4+skOffs]
  else
	mov		dstReg,ks.subKeys[skOffs]
  endif
endm

;
;----------------------------------------------------------------------------------
;		Round function macros
;----------------------------------------------------------------------------------
;
; Compiled mode code for Pentium Pro/II
; Be *very* careful about re-ordering opcodes!  This ordering seems to perform best...
;
  if KEY_MODE and KM_COMPILE
  ; do a load/xor from Sbox, put label for Sbox patching
sboxOp macro	lbl,cpuName,R,opCode,dstReg,idxReg,sboxNum,a_b
	opCode	dstReg,MDStab[SBS*idxReg+Sbump&sboxNum]
concat lbl,_Sbox,%(sboxNum),<_>,a_b,<_R>,%(R),<_>,cpuName,< label dword>
endm
;
;
RF_PPro macro	lbl,cpuName,r0,r1,r2,r3,R,S_INDEX
 ifdif <lbl>,<Enc>
		;================ decryption code ========================
;  if R eq MAX_ROUNDS-1
;	rol		e&r2&x,1				;initial rotation (do it during whitening)
;  endif
	movzx	esi,r1&l 			
	movzx	ebp,r1&h 			
  if R ne MAX_ROUNDS-1
	xor		e&r0&x,edi
  endif
	movzx	edi,r0&l 			
sboxOp	lbl,cpuName,R,mov,esi,esi,1,b

sboxOp	lbl,cpuName,R,xor,esi,ebp,2,b
	ror		e&r1&x,16 		

sboxOp	lbl,cpuName,R,mov,edi,edi,0,a
	movzx	ebp,r0&h 

sboxOp	lbl,cpuName,R,xor,edi,ebp,1,a
	movzx	ebp,r1&l 			
	ror		e&r0&x,16 		

sboxOp	lbl,cpuName,R,xor,esi,ebp,3,b
	movzx	ebp,r0&l 			

sboxOp	lbl,cpuName,R,xor,edi,ebp,2,a
	movzx	ebp,r1&h 			
	ror		e&r1&x,16 		

sboxOp	lbl,cpuName,R,xor,esi,ebp,0,b
	movzx	ebp,r0&h
  if R eq 0
	ror		e&r0&x,16
  else
	ror		e&r0&x,15				;pre-rotate for next round
  endif
sboxOp	lbl,cpuName,R,xor,edi,ebp,3,a

	lea		ebp,[edi+2*esi+12345]	
concat lbl,_SK_,%((S_INDEX+4)/4),<_>,cpuName,< label dword>

	lea		edi,[edi+esi+12345]
concat lbl,_SK_,%((S_INDEX)/4),<_>,cpuName,< label dword>
	xor		e&r3&x,ebp 		

	ror		e&r3&x,1
   if R eq 0
	xor		e&r2&x,edi
   endif
 else;================ encryption code ========================
  if R eq 0
	rol		e&r3&x,1				;do the initial rotate
  endif
	movzx	esi,r0&l 			
	movzx	ebp,r0&h 			
  if R ne 0
	xor		e&r1&x,edi				;complete the Feistel xor from last round
  endif
	movzx	edi,r1&l 			
sboxOp	lbl,cpuName,R,mov,esi,esi,0,a

sboxOp	lbl,cpuName,R,xor,esi,ebp,1,a
	ror		e&r0&x,16 		

sboxOp	lbl,cpuName,R,mov,edi,edi,1,b
	movzx	ebp,r1&h 			

sboxOp	lbl,cpuName,R,xor,edi,ebp,2,b
	movzx	ebp,r0&l 			
	ror		e&r1&x,16 		

sboxOp	lbl,cpuName,R,xor,esi,ebp,2,a
	movzx	ebp,r1&l 			

sboxOp	lbl,cpuName,R,xor,edi,ebp,3,b
	movzx	ebp,r0&h 			
	ror		e&r0&x,16 		

sboxOp	lbl,cpuName,R,xor,esi,ebp,3,a
	movzx	ebp,r1&h 			
  if R eq MAX_ROUNDS-1
	ror		e&r1&x,16
  else
	ror		e&r1&x,15				;pre-rotate for next round
  endif
sboxOp	lbl,cpuName,R,xor,edi,ebp,0,b	;could do load, then xor...

	lea		ebp,[esi+edi+12345]	
concat lbl,_SK_,%((S_INDEX)/4),<_>,cpuName,< label dword>

	lea		edi,[esi+2*edi+12345]
concat lbl,_SK_,%((S_INDEX+4)/4),<_>,cpuName,< label dword>
	xor		e&r2&x,ebp 		

	ror		e&r2&x,1 			
   if R eq MAX_ROUNDS-1
	xor		e&r3&x,edi
   endif
 endif ; lbl
endm  ; RF_PPro
endif ; KEY_MODE and KM_COMPILE


RoundFunc	macro	keySize,lbl,cpuName,z0,z1,z2,z3,R,S_INDEX
concat lbl,keySize,Round_,%(R+1),<_>,cpuName,<:>	;keep the listing easy to follow
 ifdif		<cpuName>,<PentiumPro>
	mov		z0,eax				;save previous round Feistel results (not on first round)
	mov		z1,ebx

	mov		cl,ah				;set up to access Sbox
	mov		dl,bh

	and		eax,0FFH
	and		ebx,0FFH

  if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
	doSbox	keySize,cpuName,C,D,1,2,A,B,0,1
  endif

	mov		esi,S32_1[SBS*ecx]	;"interleave" even/odd Sboxes --> no cache bank problems
	mov		edi,S32_2[SBS*edx-SBS*EDX_ADJUST]

	mov		cl,byte ptr z0[2]
	mov		dl,byte ptr z1[2]

	mov		eax,S32_0[SBS*eax]
	mov		ebx,S32_1[SBS*ebx]

  if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
	doSbox	keySize,cpuName,C,D,2,3
  endif

	xor		esi,eax
	xor		edi,ebx

	mov		eax,S32_2[SBS*ecx]
	mov		ebx,S32_3[SBS*edx-SBS*EDX_ADJUST]

	mov		dl,byte ptr z0[3]
	mov		cl,byte ptr z1[3]
	
	xor		esi,eax
	xor		edi,ebx

  if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
	doSbox	keySize,cpuName,C,D,0,3
  endif

	mov		ebx,S32_0[SBS*ecx]
	mov		eax,S32_3[SBS*edx-SBS*EDX_ADJUST]

	xor		esi,eax
	xor		edi,ebx

  if (KEY_MODE and KM_COMPILE) eq 0
	add		esi,edi				;first half of the PHT
	GetSubkey lbl,eax,S_INDEX

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -