📄 2fish_86.asm
字号:
page 60,160
title Twofish for 386+, Author: Doug Whiting, Hi/fn
;
; WARNING: This module is written for speed, not clarity!
;
.386
.MODEL FLAT, C
public blockEncrypt_86,blockDecrypt_86,reKey_86
.DATA
blockEncrypt_86 dd E_SelectCPU ;first time thru, use get_cpu_type
blockDecrypt_86 dd D_SelectCPU
reKey_86 dd R_SelectCPU
extrn MDStab:dword ;MDS multiply matrix (pre-permuted)
extrn P8x8:byte ;two fixed 8x8 permutations
q0 equ P8x8[0]
q1 equ P8x8[256]
.CODE
;
; ML syntax:
; ml.exe -Flt2fish_86.lst -coff -Cx -Zi -Zm -c -DMASM6 2fish_86.asm
;----------------------------------------------------------------------------------
; Useful general macros
;----------------------------------------------------------------------------------
OFFSET32 equ 1 ;force 32-bit strucmac stuff
.xlist ;don't expand strucmac defns
include strucmac.inc
.list
KM_ZERO = 1 ;KEY_MODE bits
KM_MIN = 2
KM_PART = 4
KM_FULL = 8
KM_COMPILE = 16
ifdef PART_KEY
%out Assembly with PART_KEY
KEY_MODE = KM_PART
elseifdef MIN_KEY
%out Assembly with MIN_KEY
KEY_MODE = KM_MIN
elseifdef ZERO_KEY
%out Assembly with ZERO_KEY
KEY_MODE = KM_ZERO
sboxK8 equ <ks.fullSbox> ;use S-box to expand keys for PentiumPro
elseifdef COMPILE_KEY
KEY_MODE = KM_COMPILE
else ;default is full key
%out Assembly with FULL_KEY
KEY_MODE = KM_FULL
endif
;
; where the permutations are used
;
P_00 equ q1 ;"outermost" permutation (in MDSmat)
P_01 equ q0
P_02 equ q0
P_03 equ q1
P_04 equ q1
P_10 equ q0
P_11 equ q0
P_12 equ q1
P_13 equ q1
P_14 equ q0
P_20 equ q1
P_21 equ q1
P_22 equ q0
P_23 equ q0
P_24 equ q0
P_30 equ q0
P_31 equ q1
P_32 equ q1
P_33 equ q0
P_34 equ q1
;
; useful in "splicing" names together
concat macro aa,bb,cc,dd,ee,ff,gg ;;__TRANSPARENT__ (signal to LST2ASM)
aa&bb&cc&dd&ee&ff&gg
endm
alloc macro varName,varType,varSize ;macro to help define parms/locals
irp XX,<%varOffs>
varName equ varType ptr [esp+XX]
endm
varOffs = varOffs+varSize;
endm
; copy from src to dst (use eax,ebx)
Copy8 macro dst,src
ifnb <src>
mov eax,src ;load source dwords(if needed)
mov ebx,src+4
endif
mov dst,eax ;store new dst dwords
mov dst+4,ebx
endm
;
;dst = xorA ^ xorb ^ xorC (8 bytes, xorC may be blank)
;returns two dst dwords in eax,ebx. Trashes ecx,edx
;oldDst = where to copy dst (for CBC mode)
Xor8 macro lbl,cpuName,dst,xorA,subKeyIndex,xorC,oldDst
ifnb <xorA>
mov eax,xorA ;load both A dwords (unless already loaded)
mov ebx,xorA+4
endif
if (KEY_MODE and KM_COMPILE) eq 0
mov ecx,ks.subKeys[subKeyIndex] ;load both B dwords
mov edx,ks.subKeys[subKeyIndex+4]
xor eax,ecx ;compute A ^ B
xor ebx,edx
else
xor eax,12345678h
concat lbl,_SK_,%((subKeyIndex)/4),<_>,cpuName,< label dword>
xor ebx,12345678h
concat lbl,_SK_,%((subKeyIndex)/4+1),<_>,cpuName,< label dword>
endif
ifnb <xorC>
mov ecx,xorC ;load both C dwords
mov edx,xorC+4
xor eax,ecx ;compute A ^ B ^ C
xor ebx,edx
endif
ifnb <dst>
ifnb <oldDst>
mov ecx,dst ;pick up previous ciphertext value
mov edx,dst+4
mov oldDst,ecx ;and make copy (for next IV)
mov oldDst+4,edx
endif
mov dst,eax ;save the result
mov dst+4,ebx ;(and return in eax,ebx)
endif
endm
;
;----------------------------------------------------------------------------------
; Definitions, Structures (AES.H)
;----------------------------------------------------------------------------------
;
BLOCK_SIZE equ 128
MAX_KEY_BITS equ 256
MAX_ROUNDS equ 16
INPUT_WHITEN equ 0
OUTPUT_WHITEN equ (BLOCK_SIZE/8)
ROUND_SUBKEYS equ (OUTPUT_WHITEN+BLOCK_SIZE/8)
TOTAL_SUBKEYS equ ((ROUND_SUBKEYS/4)+2*MAX_ROUNDS)
SUBKEY_SIZE equ <4*TOTAL_SUBKEYS>
DIR_ENCRYPT equ 0
DIR_DECRYPT equ 1
MODE_ECB equ 1
MODE_CBC equ 2
MODE_CFB1 equ 3
if KEY_MODE and KM_COMPILE
VALID_SIG equ 504D4F43h ;'COMP'
else
VALID_SIG equ 48534946h ;'FISH'
endif
cipherInstance struc
mode db 4 dup (?) ;MODE_ECB, MODE_CBC (Let C code handle MODE_CFB1)
CFB1_IV db BLOCK_SIZE/8 dup (?);CFB1 IV bytes
cipherSig dd ? ;should be VALID_SIG
IV32 dd BLOCK_SIZE/32 dup (?);CBC IV dwords
cipherInstance ends
keyInstance struc
direction db 4 dup (?) ;DIR_ENCRYPT or DIR_DECRYPT
keyLen dd ? ;length of the key in bits
keyMaterial db 68 dup (?) ;ASCII key material
keySig dd ? ;should be VALID_SIG
numRounds dd ? ;should be 16
key32 dd MAX_KEY_BITS/32 dup (?)
sboxKeys dd MAX_KEY_BITS/64 dup (?)
subKeys dd TOTAL_SUBKEYS dup (?)
fullSbox dd 4*256 dup (?) ;S-box plus MDS
if KEY_MODE and KM_COMPILE
encryptFuncPtr dd (?) ;ptr to encrypt function
decryptFuncPtr dd (?) ;ptr to encrypt function
codeSize dd (?) ;sizeof cipherProcCode
cipherProcCode dd (4600/4) dup (?) ;compiled code itself
endif
keyInstance ends
;
; To minimize code space, bias ebp to generate as many 8-bit offsets as possible
;
BIAS_VAL = (fullSbox-(TOTAL_SUBKEYS*2))
biasEBP equ <add ebp,BIAS_VAL>
ks equ <[ebp-BIAS_VAL]>
;
;----------------------------------------------------------------------------------
; KeyMode-dependent definitions
;----------------------------------------------------------------------------------
SBOX_SIZE equ 1024
if KEY_MODE and (KM_FULL or KM_COMPILE)
S32_0 equ <ks.fullSbox[0*SBOX_SIZE]>
S32_1 equ <ks.fullSbox[0*SBOX_SIZE+4]>
S32_2 equ <ks.fullSbox[2*SBOX_SIZE]>
S32_3 equ <ks.fullSbox[2*SBOX_SIZE+4]>
SBS = 8
EDX_ADJUST = 100h ;optimize for Pentium code size!
else
S32_0 equ <MDStab>
S32_1 equ <MDStab[SBOX_SIZE]>
S32_2 equ <MDStab[SBOX_SIZE*2]>
S32_3 equ <MDStab[SBOX_SIZE*3]>
S8_0 equ <byte ptr ks.fullSbox>
S8_1 equ <byte ptr ks.fullSbox[100h]>
S8_2 equ <byte ptr ks.fullSbox[200h]>
S8_3 equ <byte ptr ks.fullSbox[300h]>
SBS = 4
EDX_ADJUST = 300h ;optimize for Pentium code size!
endif
EAX_ADJUST = 0 ;these defns make handling EDX_ADJUST easier
EBX_ADJUST = 0
ECX_ADJUST = 0
;
;----------------------------------------------------------------------------------
; Macros for dealing with non-full keying S-boxes
;----------------------------------------------------------------------------------
;
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
lookupS8 macro cpuName,R,i ;__TRANSPARENT__
ifnb <R>
ifdif <cpuName>,<PentiumPro>
mov R&L,S8_&i[E&R&X-E&R&X_ADJUST]
else
movzx R,S8_&i[R]
endif
endif
endm
keyXor8 macro cpuName,R,I ;__TRANSPARENT__
ifnb <R>
ifdif <cpuName>,<PentiumPro>
xor R&L,byte ptr ks.sboxKeys[I]
else
xor R,sboxK8[4*I] ;the byte is expanded to 32 bits here
endif
endif
endm
lookupQ macro cpuName,R,i,j ;__TRANSPARENT__
ifnb <R>
ifdif <cpuName>,<PentiumPro>
mov R&L,P_&i&j[E&R&X-E&R&X_ADJUST]
else
movzx R,P_&i&j[R]
endif
endif
endm
doS4 macro cpuName,W,R1,R2,i1,i2,R3,R4,i3,i4 ;__TRANSPARENT__
lookupQ cpuName,R1,i1,%(W)
lookupQ cpuName,R2,i2,%(W)
lookupQ cpuName,R3,i3,%(W)
lookupQ cpuName,R4,i4,%(W)
keyXor8 cpuName,R1,%(i1+4*W-4)
keyXor8 cpuName,R2,%(i2+4*W-4)
keyXor8 cpuName,R3,%(i3+4*W-4)
keyXor8 cpuName,R4,%(i4+4*W-4)
endm
; this macro is a nop for full keying
doSbox macro keySize,cpuName,R1,R2,i1,i2,R3,R4,i3,i4
if KEY_MODE and (KM_PART or KM_MIN) ;use 8-bit precomputed S-box
lookupS8 cpuName,R1,i1
lookupS8 cpuName,R2,i2
lookupS8 cpuName,R3,i3
lookupS8 cpuName,R4,i4
elseif KEY_MODE and KM_ZERO
if keySize gt 128
if keySize gt 192
doS4 cpuName,4,R1,R2,i1,i2,R3,R4,i3,i4
endif
doS4 cpuName,3,R1,R2,i1,i2,R3,R4,i3,i4
endif
doS4 cpuName,2,R1,R2,i1,i2,R3,R4,i3,i4
endif
if KEY_MODE and (KM_MIN or KM_ZERO) ;run bytes thru "next-to-last" 8-bit permutation, key xor
doS4 cpuName,1,R1,R2,i1,i2,R3,R4,i3,i4
endif
endm
endif ;!(KM_FULL or KM_COMPILE)
GetSubkey macro lbl,dstReg,skOffs ;__TRANSPARENT__
if KEY_MODE and KM_ZERO
mov dstReg,roundNum
mov dstReg,ks.subKeys[8*dstReg+8*4+skOffs]
else
mov dstReg,ks.subKeys[skOffs]
endif
endm
;
;----------------------------------------------------------------------------------
; Round function macros
;----------------------------------------------------------------------------------
;
RoundFunc macro keySize,lbl,cpuName,z0,z1,z2,z3,R,S_INDEX
concat lbl,keySize,Round_,%(R+1),<_>,cpuName,<:> ;keep the listing easy to follow
ifdif <cpuName>,<PentiumPro>
mov z0,eax ;save previous round Feistel results (not on first round)
mov z1,ebx
mov cl,ah ;set up to access Sbox
mov dl,bh
and eax,0FFH
and ebx,0FFH
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
doSbox keySize,cpuName,C,D,1,2,A,B,0,1
endif
mov esi,S32_1[SBS*ecx] ;"interleave" even/odd Sboxes --> no cache bank problems
mov edi,S32_2[SBS*edx-SBS*EDX_ADJUST]
mov cl,byte ptr z0[2]
mov dl,byte ptr z1[2]
mov eax,S32_0[SBS*eax]
mov ebx,S32_1[SBS*ebx]
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
doSbox keySize,cpuName,C,D,2,3
endif
xor esi,eax
xor edi,ebx
mov eax,S32_2[SBS*ecx]
mov ebx,S32_3[SBS*edx-SBS*EDX_ADJUST]
mov dl,byte ptr z0[3]
mov cl,byte ptr z1[3]
xor esi,eax
xor edi,ebx
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
doSbox keySize,cpuName,C,D,0,3
endif
mov ebx,S32_0[SBS*ecx]
mov eax,S32_3[SBS*edx-SBS*EDX_ADJUST]
xor esi,eax
xor edi,ebx
if (KEY_MODE and KM_COMPILE) eq 0
add esi,edi ;first half of the PHT
GetSubkey lbl,eax,S_INDEX
add edi,esi ;complete the PHT
GetSubkey lbl,ebx,S_INDEX+4
add esi,eax ;add in the round subkeys
add edi,ebx
mov eax,z2 ;load z2,z3 to update them
mov ebx,z3
ifdif <lbl>,<Enc> ;
rol eax,1 ;Decryption: z2=ROL(z2,1)
xor ebx,edi ;half of Feistel xor
ror ebx,1 ;Decryption: z3=ROR(z3,1)
xor eax,esi ;complete the Feistel xor
else
rol ebx,1 ;Encryption: z3=ROL(z3,1)
xor eax,esi ;half of Feistel xor
ror eax,1 ;Encryption: z2=ROR(z2,1)
xor ebx,edi ;complete Feistel. eax,ebx=new z2,z3
endif
else ;--- do the compiled code thing
mov eax,z2 ;load z2,z3 to update them
mov ebx,z3 ;(and avoid AGI on lea opcodes below)
ifdif <lbl>,<Enc> ;---do Decryption first
rol eax,1
lea ecx,[esi+2*edi+12345678h+4]
concat Dec_SK_,%((S_INDEX+4)/4),<_>,cpuName,<:> ;label so we can "patch" code
xor ebx,ecx
lea edi,[esi+edi+12345678h]
concat Dec_SK_,%((S_INDEX+0)/4),<_>,cpuName,<:> ;label so we can "patch" code
ror ebx,1
xor eax,edi
else ;compile the encryption code
rol ebx,1
lea ecx,[esi+edi+12345678h]
concat Enc_SK_,%((S_INDEX+0)/4),<_>,cpuName,<:> ;label so we can "patch" code
xor eax,ecx
lea edi,[esi+2*edi+12345678h+4]
concat Enc_SK_,%((S_INDEX+4)/4),<_>,cpuName,<:> ;label so we can "patch" code
ror eax,1
xor ebx,edi
endif
xor ecx,ecx
inc edi ;one byte opcode, NOP here
endif
else ; ----------------- the Pentium Pro/II version
mov z0,eax ;save previous round Feistel results (not on first round)
movzx esi,ah
movzx ecx,al
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
doSbox keySize,cpuName,esi,ecx,1,0
endif
mov z1,ebx
mov esi,S32_1[SBS*esi]
shr eax,16
mov ecx,S32_0[SBS*ecx]
movzx edi,bh
movzx edx,bl
if (KEY_MODE and (KM_FULL or KM_COMPILE)) eq 0
doSbox keySize,cpuName,edi,edx,2,1
endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -