📄 aes_x86_v2.asm

📁 一个用vc编写的aes算法实现
💻 ASM
📖 第 1 页 / 共 3 页
字号:
12 3 下一页

; ---------------------------------------------------------------------------
; Copyright (c) 2002, Dr Brian Gladman, Worcester, UK.   All rights reserved.
;
; LICENSE TERMS
;
; The free distribution and use of this software in both source and binary
; form is allowed (with or without changes) provided that:
;
;   1. distributions of this source code include the above copyright
;      notice, this list of conditions and the following disclaimer;
;
;   2. distributions in binary form include the above copyright
;      notice, this list of conditions and the following disclaimer
;      in the documentation and/or other associated materials;
;
;   3. the copyright holder's name is not used to endorse products
;      built using this software without specific written permission.
;
; ALTERNATIVELY, provided that this notice is retained in full, this product
; may be distributed under the terms of the GNU General Public License (GPL),
; in which case the provisions of the GPL apply INSTEAD OF those given above.
;
; DISCLAIMER
;
; This software is provided 'as is' with no explicit or implied warranties
; in respect of its properties, including, but not limited to, correctness
; and/or fitness for purpose.
; ---------------------------------------------------------------------------
; Issue 09/09/2006

; This code requires either ASM_X86_V2 or ASM_X86_V2C to be set in aesopt.h
; and the same define to be set here as well. If AES_V2C is set this file
; requires the C files aeskey.c and aestab.c for support.

; An AES implementation for x86 processors using the YASM (or NASM) assembler.
; This is a full assembler implementation covering encryption, decryption and
; key scheduling. It uses 2k bytes of tables but its encryption and decryption
; performance is very close to that obtained using large tables.  Key schedule
; expansion is slower for both encryption and decryption but this is likely to
; be offset by the much smaller load that this version places on the processor
; cache. I acknowledge the contribution made by Daniel Bernstein to aspects of
; the design of the AES round function used here.
;
; This code provides the standard AES block size (128 bits, 16 bytes) and the
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
; interface as my C implementation. The ebx, esi, edi and ebp registers are
; preserved across calls but eax, ecx and edx and the artihmetic status flags
; are not.  Although this is a full assembler implementation, it can be used
; in conjunction with my C code which provides faster key scheduling using
; large tables. In this case aeskey.c should be compiled with ASM_X86_V2C
; defined.  It is also important that the defines below match those used in the
; C code.  This code uses the VC++ register saving conentions; if it is used
; with another compiler, conventions for using and saving registers may need
; to be checked (and calling conventions).  The YASM command line for the VC++
; custom build step is:
;
;    yasm -Xvc -f win32 -D <Z> -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
;
; where <Z> is ASM_X86_V2 or ASM_X86_V2C.  The calling intefaces are:
;
;     AES_RETURN aes_encrypt(const unsigned char in_blk[],
;                   unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt(const unsigned char in_blk[],
;                   unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
;                                            const aes_encrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
;                                            const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_encrypt_key(const unsigned char key[],
;                           unsigned int len, const aes_decrypt_ctx cx[1]);
;
;     AES_RETURN aes_decrypt_key(const unsigned char key[],
;                           unsigned int len, const aes_decrypt_ctx cx[1]);
;
; where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
; either bits or bytes.
;
; Comment in/out the following lines to obtain the desired subroutines. These
; selections MUST match those in the C header file aes.h

%define AES_128                 ; define if AES with 128 bit keys is needed
%define AES_192                 ; define if AES with 192 bit keys is needed
%define AES_256                 ; define if AES with 256 bit keys is needed
%define AES_VAR                 ; define if a variable key size is needed
%define ENCRYPTION              ; define if encryption is needed
%define DECRYPTION              ; define if decryption is needed
%define AES_REV_DKS             ; define if key decryption schedule is reversed

%ifndef ASM_X86_V2C
%define ENCRYPTION_KEY_SCHEDULE ; define if encryption key expansion is needed
%define DECRYPTION_KEY_SCHEDULE ; define if decryption key expansion is needed
%endif

; The encryption key schedule has the following in memory layout where N is the
; number of rounds (10, 12 or 14):
;
; lo: | input key (round 0)  |  ; each round is four 32-bit words
;     | encryption round 1   |
;     | encryption round 2   |
;     ....
;     | encryption round N-1 |
; hi: | encryption round N   |
;
; The decryption key schedule is normally set up so that it has the same
; layout as above by actually reversing the order of the encryption key
; schedule in memory (this happens when AES_REV_DKS is set):
;
; lo: | decryption round 0   | =              | encryption round N   |
;     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
;     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
;     ....                       ....
;     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
; hi: | decryption round N   | =              | input key (round 0)  |
;
; with rounds except the first and last modified using inv_mix_column()
; But if AES_REV_DKS is NOT set the order of keys is left as it is for
; encryption so that it has to be accessed in reverse when used for
; decryption (although the inverse mix column modifications are done)
;
; lo: | decryption round 0   | =              | input key (round 0)  |
;     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
;     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
;     ....                       ....
;     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
; hi: | decryption round N   | =              | encryption round N   |
;
; This layout is faster when the assembler key scheduling provided here
; is used.
;
; The DLL interface must use the _stdcall convention in which the number
; of bytes of parameter space is added after an @ to the sutine's name.
; We must also remove our parameters from the stack before return (see
; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.

;%define DLL_EXPORT

; End of user defines

%ifdef AES_VAR
%ifndef AES_128
%define AES_128
%endif
%ifndef AES_192
%define AES_192
%endif
%ifndef AES_256
%define AES_256
%endif
%endif

%ifdef AES_VAR
%define KS_LENGTH       60
%elifdef AES_256
%define KS_LENGTH       60
%elifdef AES_192
%define KS_LENGTH       52
%else
%define KS_LENGTH       44
%endif

; These macros implement stack based local variables

%macro  save 2
    mov     [esp+4*%1],%2
%endmacro

%macro  restore 2
    mov     %1,[esp+4*%2]
%endmacro

; the DLL has to implement the _stdcall calling interface on return
; In this case we have to take our parameters (3 4-byte pointers)
; off the stack

%define parms 12

%macro  do_name 1-2 parms
%ifndef DLL_EXPORT
    align 32
    global  %1
%1:
%else
    align 32
    global  %1@%2
    export  %1@%2
%1@%2:
%endif
%endmacro

%macro  do_call 1-2 parms
%ifndef DLL_EXPORT
    call    %1
    add     esp,%2
%else
    call    %1@%2
%endif
%endmacro

%macro  do_exit  0-1 parms
%ifdef DLL_EXPORT
    ret %1
%else
    ret
%endif
%endmacro

; finite field multiplies by {02}, {04} and {08}

%define f2(x)   ((x<<1)^(((x>>7)&1)*0x11b))
%define f4(x)   ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
%define f8(x)   ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))

; finite field multiplies required in table generation

%define f3(x)   (f2(x) ^ x)
%define f9(x)   (f8(x) ^ x)
%define fb(x)   (f8(x) ^ f2(x) ^ x)
%define fd(x)   (f8(x) ^ f4(x) ^ x)
%define fe(x)   (f8(x) ^ f4(x) ^ f2(x))

%define etab_0(x)   [enc_tab+4+8*x]
%define etab_1(x)   [enc_tab+3+8*x]
%define etab_2(x)   [enc_tab+2+8*x]
%define etab_3(x)   [enc_tab+1+8*x]
%define etab_b(x)   byte [enc_tab+1+8*x] ; used with movzx for 0x000000xx
%define etab_w(x)   word [enc_tab+8*x]   ; used with movzx for 0x0000xx00

%define btab_0(x)   [enc_tab+6+8*x]
%define btab_1(x)   [enc_tab+5+8*x]
%define btab_2(x)   [enc_tab+4+8*x]
%define btab_3(x)   [enc_tab+3+8*x]

; ROUND FUNCTION.  Build column[2] on ESI and column[3] on EDI that have the
; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
;
; Input:
;
;   EAX     column[0]
;   EBX     column[1]
;   ECX     column[2]
;   EDX     column[3]
;   ESI     column key[round][2]
;   EDI     column key[round][3]
;   EBP     scratch
;
; Output:
;
;   EBP     column[0]   unkeyed
;   EBX     column[1]   unkeyed
;   ESI     column[2]   keyed
;   EDI     column[3]   keyed
;   EAX     scratch
;   ECX     scratch
;   EDX     scratch

%macro rnd_fun 2

    rol     ebx,16
    %1      esi, cl, 0, ebp
    %1      esi, dh, 1, ebp
    %1      esi, bh, 3, ebp
    %1      edi, dl, 0, ebp
    %1      edi, ah, 1, ebp
    %1      edi, bl, 2, ebp
    %2      ebp, al, 0, ebp
    shr     ebx,16
    and     eax,0xffff0000
    or      eax,ebx
    shr     edx,16
    %1      ebp, ah, 1, ebx
    %1      ebp, dh, 3, ebx
    %2      ebx, dl, 2, ebx
    %1      ebx, ch, 1, edx
    %1      ebx, al, 0, edx
    shr     eax,16
    shr     ecx,16
    %1      ebp, cl, 2, edx
    %1      edi, ch, 3, edx
    %1      esi, al, 2, edx
    %1      ebx, ah, 3, edx

%endmacro

; Basic MOV and XOR Operations for normal rounds

%macro  nr_xor  4
    movzx   %4,%2
    xor     %1,etab_%3(%4)
%endmacro

%macro  nr_mov  4
    movzx   %4,%2
    mov     %1,etab_%3(%4)
%endmacro

; Basic MOV and XOR Operations for last round

%if 1

    %macro  lr_xor  4
        movzx   %4,%2
        movzx   %4,etab_b(%4)
    %if %3 != 0
        shl     %4,8*%3
    %endif
        xor     %1,%4
    %endmacro

    %macro  lr_mov  4
        movzx   %4,%2
        movzx   %1,etab_b(%4)
    %if %3 != 0
        shl     %1,8*%3
    %endif
    %endmacro

%else       ; less effective but worth leaving as an option

    %macro  lr_xor  4
        movzx   %4,%2
        mov     %4,btab_%3(%4)
        and     %4,0x000000ff << 8 * %3
        xor     %1,%4
    %endmacro

    %macro  lr_mov  4
        movzx   %4,%2
        mov     %1,btab_%3(%4)
        and     %1,0x000000ff << 8 * %3
    %endmacro

%endif

; Apply S-Box to the 4 bytes in a 32-bit word and rotate left 3 byte positions
;
;   %1 : output is xored into this register
;   %2 : input: a => eax; b => ebx; c => ecx; d => edx
;   %3 : scratch register

%macro l3s_col 3
    lr_xor  %1,%2h,0,%3
    lr_xor  %1,%2l,3,%3
    shr     e%2x,16
    lr_xor  %1,%2h,2,%3
    lr_xor  %1,%2l,1,%3
%endmacro

; offsets to parameters

in_blk  equ     4   ; input byte array address parameter
out_blk equ     8   ; output byte array address parameter
ctx     equ    12   ; AES context structure
stk_spc equ    20   ; stack space

%ifdef  ENCRYPTION

%define ENCRYPTION_TABLE

%macro enc_round 0

    add     ebp,16
    save    0,ebp
    mov     esi,[ebp+8]
    mov     edi,[ebp+12]

    rnd_fun nr_xor, nr_mov

    mov     eax,ebp
    mov     ecx,esi
    mov     edx,edi
    restore ebp,0
    xor     eax,[ebp]
    xor     ebx,[ebp+4]

%endmacro

%macro enc_last_round 0

    add     ebp,16
    save    0,ebp
    mov     esi,[ebp+8]
    mov     edi,[ebp+12]

    rnd_fun lr_xor, lr_mov

    mov     eax,ebp
    restore ebp,0
    xor     eax,[ebp]
    xor     ebx,[ebp+4]

%endmacro

    section .text align=32

; AES Encryption Subroutine

    do_name _aes_encrypt,12

    sub     esp,stk_spc
    mov     [esp+16],ebp
    mov     [esp+12],ebx
    mov     [esp+ 8],esi
    mov     [esp+ 4],edi

    mov     esi,[esp+in_blk+stk_spc] ; input pointer
    mov     eax,[esi   ]
    mov     ebx,[esi+ 4]
    mov     ecx,[esi+ 8]
    mov     edx,[esi+12]

    mov     ebp,[esp+ctx+stk_spc]    ; key pointer
    movzx   edi,byte [ebp+4*KS_LENGTH]
    xor     eax,[ebp   ]
    xor     ebx,[ebp+ 4]
    xor     ecx,[ebp+ 8]
    xor     edx,[ebp+12]

; determine the number of rounds

    cmp     edi,10*16
    je      .3
    cmp     edi,12*16
    je      .2
    cmp     edi,14*16
    je      .1
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -