📄 aessmall_x86.asm
字号:
; ---------------------------------------------------------------------------
; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
;
; LICENSE TERMS
;
; The free distribution and use of this software is allowed (with or without
; changes) provided that:
;
; 1. source code distributions include the above copyright notice, this
; list of conditions and the following disclaimer;
;
; 2. binary distributions include the above copyright notice, this list
; of conditions and the following disclaimer in their documentation;
;
; 3. the name of the copyright holder is not used to endorse products
; built using this software without specific written permission.
;
; DISCLAIMER
;
; This software is provided 'as is' with no explicit or implied warranties
; in respect of its properties, including, but not limited to, correctness
; and/or fitness for purpose.
; ---------------------------------------------------------------------------
; Issue 20/12/2007
;
; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
; and the same define to be set here as well. If AES_V2C is set this file
; requires the C files aeskey.c and aestab.c for support.
; An AES implementation for x86 processors using the YASM (or NASM) assembler.
; This is a full assembler implementation covering encryption, decryption and
; key scheduling. It uses 2k bytes of tables but its encryption and decryption
; performance is very close to that obtained using large tables. Key schedule
; expansion is slower for both encryption and decryption but this is likely to
; be offset by the much smaller load that this version places on the processor
; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
; the design of the AES round function used here.
;
; This code provides the standard AES block size (128 bits, 16 bytes) and the
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
; interface as my C implementation. The ebx, esi, edi and ebp registers are
; preserved across calls but eax, ecx and edx and the artihmetic status flags
; are not. Although this is a full assembler implementation, it can be used
; in conjunction with my C code which provides faster key scheduling using
; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
; defined. It is also important that the defines below match those used in the
; C code. This code uses the VC++ register saving conentions; if it is used
; with another compiler, conventions for using and saving registers may need
; to be checked (and calling conventions). The YASM command line for the VC++
; custom build step is:
;
; yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
;
; For the cryptlib build this is (pcg):
;
; yasm -Xvc -f win32 -D ASM_X86_V2C -o aescrypt2.obj aes_x86_v2.asm
;
; where <Z> is ASM_X86_V2 or ASM_X86_V2C. The calling intefaces are:
;
; AES_RETURN aes_encrypt(const unsigned char in_blk[],
; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
;
; AES_RETURN aes_decrypt(const unsigned char in_blk[],
; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
;
; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
; const aes_encrypt_ctx cx[1]);
;
; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
; const aes_decrypt_ctx cx[1]);
;
; AES_RETURN aes_encrypt_key(const unsigned char key[],
; unsigned int len, const aes_decrypt_ctx cx[1]);
;
; AES_RETURN aes_decrypt_key(const unsigned char key[],
; unsigned int len, const aes_decrypt_ctx cx[1]);
;
; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
; either bits or bytes.
; The DLL interface must use the _stdcall convention in which the number
; of bytes of parameter space is added after an @ to the sutine's name.
; We must also remove our parameters from the stack before return (see
; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
;
; Adapted for TrueCrypt by the TrueCrypt Foundation:
; - All tables generated at run-time
; - Adapted for 16-bit environment
;
CPU 386
USE16
SEGMENT _TEXT PUBLIC CLASS=CODE USE16
SEGMENT _DATA PUBLIC CLASS=DATA USE16
GROUP DGROUP _TEXT _DATA
extern _aes_dec_tab ; Aestab.c
extern _aes_enc_tab
; %define DLL_EXPORT
; The size of the code can be reduced by using functions for the encryption
; and decryption rounds in place of macro expansion
%define REDUCE_CODE_SIZE
; Comment in/out the following lines to obtain the desired subroutines. These
; selections MUST match those in the C header file aes.h
; %define AES_128 ; define if AES with 128 bit keys is needed
; %define AES_192 ; define if AES with 192 bit keys is needed
%define AES_256 ; define if AES with 256 bit keys is needed
; %define AES_VAR ; define if a variable key size is needed
%define ENCRYPTION ; define if encryption is needed
%define DECRYPTION ; define if decryption is needed
; %define AES_REV_DKS ; define if key decryption schedule is reversed
%ifndef ASM_X86_V2C
%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
%endif
; The encryption key schedule has the following in memory layout where N is the
; number of rounds (10, 12 or 14):
;
; lo: | input key (round 0) | ; each round is four 32-bit words
; | encryption round 1 |
; | encryption round 2 |
; ....
; | encryption round N-1 |
; hi: | encryption round N |
;
; The decryption key schedule is normally set up so that it has the same
; layout as above by actually reversing the order of the encryption key
; schedule in memory (this happens when AES_REV_DKS is set):
;
; lo: | decryption round 0 | = | encryption round N |
; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
; .... ....
; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
; hi: | decryption round N | = | input key (round 0) |
;
; with rounds except the first and last modified using inv_mix_column()
; But if AES_REV_DKS is NOT set the order of keys is left as it is for
; encryption so that it has to be accessed in reverse when used for
; decryption (although the inverse mix column modifications are done)
;
; lo: | decryption round 0 | = | input key (round 0) |
; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
; .... ....
; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
; hi: | decryption round N | = | encryption round N |
;
; This layout is faster when the assembler key scheduling provided here
; is used.
;
; End of user defines
%ifdef AES_VAR
%ifndef AES_128
%define AES_128
%endif
%ifndef AES_192
%define AES_192
%endif
%ifndef AES_256
%define AES_256
%endif
%endif
%ifdef AES_VAR
%define KS_LENGTH 60
%elifdef AES_256
%define KS_LENGTH 60
%elifdef AES_192
%define KS_LENGTH 52
%else
%define KS_LENGTH 44
%endif
; These macros implement stack based local variables
%macro save 2
mov [esp+4*%1],%2
%endmacro
%macro restore 2
mov %1,[esp+4*%2]
%endmacro
%ifdef REDUCE_CODE_SIZE
%macro mf_call 1
call %1
%endmacro
%else
%macro mf_call 1
%1
%endmacro
%endif
; the DLL has to implement the _stdcall calling interface on return
; In this case we have to take our parameters (3 4-byte pointers)
; off the stack
%define parms 12
%macro do_name 1-2 parms
%ifndef DLL_EXPORT
global %1
%1:
%else
global %1@%2
export %1@%2
%1@%2:
%endif
%endmacro
%macro do_call 1-2 parms
%ifndef DLL_EXPORT
call %1
add esp,%2
%else
call %1@%2
%endif
%endmacro
%macro do_exit 0-1 parms
%ifdef DLL_EXPORT
ret %1
%else
ret
%endif
%endmacro
; finite field multiplies by {02}, {04} and {08}
%define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
%define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
%define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
; finite field multiplies required in table generation
%define f3(x) (f2(x) ^ x)
%define f9(x) (f8(x) ^ x)
%define fb(x) (f8(x) ^ f2(x) ^ x)
%define fd(x) (f8(x) ^ f4(x) ^ x)
%define fe(x) (f8(x) ^ f4(x) ^ f2(x))
%define etab_0(x) [_aes_enc_tab+4+8*x]
%define etab_1(x) [_aes_enc_tab+3+8*x]
%define etab_2(x) [_aes_enc_tab+2+8*x]
%define etab_3(x) [_aes_enc_tab+1+8*x]
%define etab_b(x) byte [_aes_enc_tab+1+8*x] ; used with movzx for 0x000000xx
%define etab_w(x) word [_aes_enc_tab+8*x] ; used with movzx for 0x0000xx00
%define btab_0(x) [_aes_enc_tab+6+8*x]
%define btab_1(x) [_aes_enc_tab+5+8*x]
%define btab_2(x) [_aes_enc_tab+4+8*x]
%define btab_3(x) [_aes_enc_tab+3+8*x]
; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
;
; Input:
;
; EAX column[0]
; EBX column[1]
; ECX column[2]
; EDX column[3]
; ESI column key[round][2]
; EDI column key[round][3]
; EBP scratch
;
; Output:
;
; EBP column[0] unkeyed
; EBX column[1] unkeyed
; ESI column[2] keyed
; EDI column[3] keyed
; EAX scratch
; ECX scratch
; EDX scratch
%macro rnd_fun 2
rol ebx,16
%1 esi, cl, 0, ebp
%1 esi, dh, 1, ebp
%1 esi, bh, 3, ebp
%1 edi, dl, 0, ebp
%1 edi, ah, 1, ebp
%1 edi, bl, 2, ebp
%2 ebp, al, 0, ebp
shr ebx,16
and eax,0xffff0000
or eax,ebx
shr edx,16
%1 ebp, ah, 1, ebx
%1 ebp, dh, 3, ebx
%2 ebx, dl, 2, ebx
%1 ebx, ch, 1, edx
%1 ebx, al, 0, edx
shr eax,16
shr ecx,16
%1 ebp, cl, 2, edx
%1 edi, ch, 3, edx
%1 esi, al, 2, edx
%1 ebx, ah, 3, edx
%endmacro
; Basic MOV and XOR Operations for normal rounds
%macro nr_xor 4
movzx %4,%2
xor %1,etab_%3(%4)
%endmacro
%macro nr_mov 4
movzx %4,%2
mov %1,etab_%3(%4)
%endmacro
; Basic MOV and XOR Operations for last round
%if 1
%macro lr_xor 4
movzx %4,%2
movzx %4,etab_b(%4)
%if %3 != 0
shl %4,8*%3
%endif
xor %1,%4
%endmacro
%macro lr_mov 4
movzx %4,%2
movzx %1,etab_b(%4)
%if %3 != 0
shl %1,8*%3
%endif
%endmacro
%else ; less effective but worth leaving as an option
%macro lr_xor 4
movzx %4,%2
mov %4,btab_%3(%4)
and %4,0x000000ff << 8 * %3
xor %1,%4
%endmacro
%macro lr_mov 4
movzx %4,%2
mov %1,btab_%3(%4)
and %1,0x000000ff << 8 * %3
%endmacro
%endif
; Apply S-Box to the 4 bytes in a 32-bit word and rotate byte positions
%ifdef REDUCE_CODE_SIZE
l3s_col:
movzx ecx,al ; in eax
movzx ecx, etab_b(ecx) ; out eax
xor edx,ecx ; scratch ecx,edx
movzx ecx,ah
movzx ecx, etab_b(ecx)
shl ecx,8
xor edx,ecx
shr eax,16
movzx ecx,al
movzx ecx, etab_b(ecx)
shl ecx,16
xor edx,ecx
movzx ecx,ah
movzx ecx, etab_b(ecx)
shl ecx,24
xor edx,ecx
mov eax,edx
ret
%else
%macro l3s_col 0
movzx ecx,al ; in eax
movzx ecx, etab_b(ecx) ; out eax
xor edx,ecx ; scratch ecx,edx
movzx ecx,ah
movzx ecx, etab_b(ecx)
shl ecx,8
xor edx,ecx
shr eax,16
movzx ecx,al
movzx ecx, etab_b(ecx)
shl ecx,16
xor edx,ecx
movzx ecx,ah
movzx ecx, etab_b(ecx)
shl ecx,24
xor edx,ecx
mov eax,edx
%endmacro
%endif
; offsets to parameters
in_blk equ 2 ; input byte array address parameter
out_blk equ 4 ; output byte array address parameter
ctx equ 6 ; AES context structure
stk_spc equ 20 ; stack space
%ifdef ENCRYPTION
; %define ENCRYPTION_TABLE
%ifdef REDUCE_CODE_SIZE
enc_round:
sub sp, 2
add ebp,16
save 1,ebp
mov esi,[ebp+8]
mov edi,[ebp+12]
rnd_fun nr_xor, nr_mov
mov eax,ebp
mov ecx,esi
mov edx,edi
restore ebp,1
xor eax,[ebp]
xor ebx,[ebp+4]
add sp, 2
ret
%else
%macro enc_round 0
add ebp,16
save 0,ebp
mov esi,[ebp+8]
mov edi,[ebp+12]
rnd_fun nr_xor, nr_mov
mov eax,ebp
mov ecx,esi
mov edx,edi
restore ebp,0
xor eax,[ebp]
xor ebx,[ebp+4]
%endmacro
%endif
%macro enc_last_round 0
add ebp,16
save 0,ebp
mov esi,[ebp+8]
mov edi,[ebp+12]
rnd_fun lr_xor, lr_mov
mov eax,ebp
restore ebp,0
xor eax,[ebp]
xor ebx,[ebp+4]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -