📄 aes-x86.s
字号:
//// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.// All rights reserved.//// TERMS//// Redistribution and use in source and binary forms, with or without// modification, are permitted subject to the following conditions://// 1. Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// 2. Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// 3. The copyright holder's name must not be used to endorse or promote// any products derived from this software without his specific prior// written permission.//// This software is provided 'as is' with no express or implied warranties// of correctness or fitness for purpose.// Modified by Jari Ruusu, December 24 2001// - Converted syntax to GNU CPP/assembler syntax// - C programming interface converted back to "old" API// - Minor portability cleanups and speed optimizations// Modified by Jari Ruusu, April 11 2002// - Added above copyright and terms to resulting object code so that// binary distributions can avoid legal trouble// An AES (Rijndael) implementation for x86 compatible processors. This// version uses i386 instruction set but instruction scheduling is optimized// for Pentium-2. This version only implements the standard AES block length// (128 bits, 16 bytes). This code does not preserve the eax, ecx or edx// registers or the artihmetic status flags. However, the ebx, esi, edi, and// ebp registers are preserved across calls.// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])#if defined(USE_UNDERLINE)# define aes_set_key _aes_set_key# define aes_encrypt _aes_encrypt# define aes_decrypt _aes_decrypt#endif#if !defined(ALIGN32BYTES)# define ALIGN32BYTES 32#endif .file "aes-x86.S" .globl aes_set_key .globl aes_encrypt .globl aes_decrypt .textcopyright: .ascii " \000" .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000" .ascii "All rights reserved.\000" .ascii " \000" .ascii "TERMS\000" .ascii " \000" .ascii " Redistribution and use in source and binary forms, with or without\000" .ascii " modification, are permitted subject to the following conditions:\000" .ascii " \000" .ascii " 1. Redistributions of source code must retain the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer.\000" .ascii " \000" .ascii " 2. Redistributions in binary form must reproduce the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer in the\000" .ascii " documentation and/or other materials provided with the distribution.\000" .ascii " \000" .ascii " 3. The copyright holder's name must not be used to endorse or promote\000" .ascii " any products derived from this software without his specific prior\000" .ascii " written permission.\000" .ascii " \000" .ascii " This software is provided 'as is' with no express or implied warranties\000" .ascii " of correctness or fitness for purpose.\000" .ascii " \000"#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)// offsets to parameters with one register pushed onto stack#define ctx 8 // AES context structure#define in_blk 12 // input byte array address parameter#define out_blk 16 // output byte array address parameter// offsets in context structure#define nkey 0 // key length, size 4#define nrnd 4 // number of rounds, size 4#define ekey 8 // encryption key schedule base address, size 256#define dkey 264 // decryption key schedule base address, size 256// This macro performs a forward encryption cycle. It is entered with// the first previous round column values in %eax, %ebx, %esi and %edi and// exits with the final values in the same registers.#define fwd_rnd(p1,p2) \ mov %ebx,(%esp) ;\ movzbl %al,%edx ;\ mov %eax,%ecx ;\ mov p2(%ebp),%eax ;\ mov %edi,4(%esp) ;\ mov p2+12(%ebp),%edi ;\ xor p1(,%edx,4),%eax ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ mov p2+4(%ebp),%ebx ;\ xor p1+tlen(,%edx,4),%edi ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+3*tlen(,%ecx,4),%ebx ;\ mov %esi,%ecx ;\ mov p1+2*tlen(,%edx,4),%esi ;\ movzbl %cl,%edx ;\ xor p1(,%edx,4),%esi ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ xor p1+tlen(,%edx,4),%ebx ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+2*tlen(,%edx,4),%eax ;\ mov (%esp),%edx ;\ xor p1+3*tlen(,%ecx,4),%edi ;\ movzbl %dl,%ecx ;\ xor p2+8(%ebp),%esi ;\ xor p1(,%ecx,4),%ebx ;\ movzbl %dh,%ecx ;\ shr $16,%edx ;\ xor p1+tlen(,%ecx,4),%eax ;\ movzbl %dl,%ecx ;\ movzbl %dh,%edx ;\ xor p1+2*tlen(,%ecx,4),%edi ;\ mov 4(%esp),%ecx ;\ xor p1+3*tlen(,%edx,4),%esi ;\ movzbl %cl,%edx ;\ xor p1(,%edx,4),%edi ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ xor p1+tlen(,%edx,4),%esi ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+2*tlen(,%edx,4),%ebx ;\ xor p1+3*tlen(,%ecx,4),%eax// This macro performs an inverse encryption cycle. It is entered with// the first previous round column values in %eax, %ebx, %esi and %edi and// exits with the final values in the same registers.#define inv_rnd(p1,p2) \ movzbl %al,%edx ;\ mov %ebx,(%esp) ;\ mov %eax,%ecx ;\ mov p2(%ebp),%eax ;\ mov %edi,4(%esp) ;\ mov p2+4(%ebp),%ebx ;\ xor p1(,%edx,4),%eax ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ mov p2+12(%ebp),%edi ;\ xor p1+tlen(,%edx,4),%ebx ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+3*tlen(,%ecx,4),%edi ;\ mov %esi,%ecx ;\ mov p1+2*tlen(,%edx,4),%esi ;\ movzbl %cl,%edx ;\ xor p1(,%edx,4),%esi ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ xor p1+tlen(,%edx,4),%edi ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+2*tlen(,%edx,4),%eax ;\ mov (%esp),%edx ;\ xor p1+3*tlen(,%ecx,4),%ebx ;\ movzbl %dl,%ecx ;\ xor p2+8(%ebp),%esi ;\ xor p1(,%ecx,4),%ebx ;\ movzbl %dh,%ecx ;\ shr $16,%edx ;\ xor p1+tlen(,%ecx,4),%esi ;\ movzbl %dl,%ecx ;\ movzbl %dh,%edx ;\ xor p1+2*tlen(,%ecx,4),%edi ;\ mov 4(%esp),%ecx ;\ xor p1+3*tlen(,%edx,4),%eax ;\ movzbl %cl,%edx ;\ xor p1(,%edx,4),%edi ;\ movzbl %ch,%edx ;\ shr $16,%ecx ;\ xor p1+tlen(,%edx,4),%eax ;\ movzbl %cl,%edx ;\ movzbl %ch,%ecx ;\ xor p1+2*tlen(,%edx,4),%ebx ;\ xor p1+3*tlen(,%ecx,4),%esi// AES (Rijndael) Encryption Subroutine .text .align ALIGN32BYTESaes_encrypt: push %ebp mov ctx(%esp),%ebp // pointer to context mov in_blk(%esp),%ecx push %ebx push %esi push %edi mov nrnd(%ebp),%edx // number of rounds lea ekey+16(%ebp),%ebp // key pointer// input four columns and xor in first round key mov (%ecx),%eax mov 4(%ecx),%ebx mov 8(%ecx),%esi mov 12(%ecx),%edi xor -16(%ebp),%eax xor -12(%ebp),%ebx xor -8(%ebp),%esi xor -4(%ebp),%edi sub $8,%esp // space for register saves on stack sub $10,%edx je aes_15 add $32,%ebp sub $2,%edx je aes_13 add $32,%ebp fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key fwd_rnd(aes_ft_tab,-48)aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key fwd_rnd(aes_ft_tab,-16)aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key fwd_rnd(aes_ft_tab,16) fwd_rnd(aes_ft_tab,32) fwd_rnd(aes_ft_tab,48) fwd_rnd(aes_ft_tab,64) fwd_rnd(aes_ft_tab,80) fwd_rnd(aes_ft_tab,96) fwd_rnd(aes_ft_tab,112) fwd_rnd(aes_ft_tab,128) fwd_rnd(aes_fl_tab,144) // last round uses a different table// move final values to the output array. mov out_blk+20(%esp),%ebp add $8,%esp mov %eax,(%ebp) mov %ebx,4(%ebp) mov %esi,8(%ebp) mov %edi,12(%ebp) pop %edi pop %esi pop %ebx pop %ebp ret// AES (Rijndael) Decryption Subroutine .align ALIGN32BYTESaes_decrypt: push %ebp mov ctx(%esp),%ebp // pointer to context mov in_blk(%esp),%ecx push %ebx push %esi push %edi mov nrnd(%ebp),%edx // number of rounds lea dkey+16(%ebp),%ebp // key pointer// input four columns and xor in first round key mov (%ecx),%eax mov 4(%ecx),%ebx mov 8(%ecx),%esi mov 12(%ecx),%edi xor -16(%ebp),%eax xor -12(%ebp),%ebx xor -8(%ebp),%esi xor -4(%ebp),%edi sub $8,%esp // space for register saves on stack sub $10,%edx je aes_25 add $32,%ebp sub $2,%edx je aes_23 add $32,%ebp inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key inv_rnd(aes_it_tab,-48)aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key inv_rnd(aes_it_tab,-16)aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key inv_rnd(aes_it_tab,16) inv_rnd(aes_it_tab,32) inv_rnd(aes_it_tab,48) inv_rnd(aes_it_tab,64) inv_rnd(aes_it_tab,80) inv_rnd(aes_it_tab,96) inv_rnd(aes_it_tab,112) inv_rnd(aes_it_tab,128) inv_rnd(aes_il_tab,144) // last round uses a different table// move final values to the output array. mov out_blk+20(%esp),%ebp add $8,%esp mov %eax,(%ebp) mov %ebx,4(%ebp) mov %esi,8(%ebp) mov %edi,12(%ebp) pop %edi pop %esi pop %ebx pop %ebp ret// AES (Rijndael) Key Schedule Subroutine// input/output parameters#define aes_cx 12 // AES context#define in_key 16 // key input array address#define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)#define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only// offsets for locals#define cnt -4#define slen 8// This macro performs a column mixing operation on an input 32-bit// word to give a 32-bit result. It uses each of the 4 bytes in the// the input column to index 4 different tables of 256 32-bit words// that are xored together to form the output value.#define mix_col(p1) \ movzbl %bl,%ecx ;\ mov p1(,%ecx,4),%eax ;\ movzbl %bh,%ecx ;\ ror $16,%ebx ;\ xor p1+tlen(,%ecx,4),%eax ;\ movzbl %bl,%ecx ;\ xor p1+2*tlen(,%ecx,4),%eax ;\ movzbl %bh,%ecx ;\ xor p1+3*tlen(,%ecx,4),%eax// Key Schedule Macros#define ksc4(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xor 4*p1+aes_rcon_tab,%eax ;\ xor %eax,%esi ;\ xor %esi,%ebp ;\ mov %esi,16*p1(%edi) ;\ mov %ebp,16*p1+4(%edi) ;\ xor %ebp,%edx ;\ xor %edx,%ebx ;\ mov %edx,16*p1+8(%edi) ;\ mov %ebx,16*p1+12(%edi)#define ksc6(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xor 4*p1+aes_rcon_tab,%eax ;\ xor 24*p1-24(%edi),%eax ;\ mov %eax,24*p1(%edi) ;\ xor 24*p1-20(%edi),%eax ;\ mov %eax,24*p1+4(%edi) ;\ xor %eax,%esi ;\ xor %esi,%ebp ;\ mov %esi,24*p1+8(%edi) ;\ mov %ebp,24*p1+12(%edi) ;\ xor %ebp,%edx ;\ xor %edx,%ebx ;\ mov %edx,24*p1+16(%edi) ;\ mov %ebx,24*p1+20(%edi)#define ksc8(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xor 4*p1+aes_rcon_tab,%eax ;\ xor 32*p1-32(%edi),%eax ;\ mov %eax,32*p1(%edi) ;\ xor 32*p1-28(%edi),%eax ;\ mov %eax,32*p1+4(%edi) ;\ xor 32*p1-24(%edi),%eax ;\ mov %eax,32*p1+8(%edi) ;\ xor 32*p1-20(%edi),%eax ;\ mov %eax,32*p1+12(%edi) ;\ push %ebx ;\ mov %eax,%ebx ;\ mix_col(aes_fl_tab) ;\ pop %ebx ;\ xor %eax,%esi ;\ xor %esi,%ebp ;\ mov %esi,32*p1+16(%edi) ;\ mov %ebp,32*p1+20(%edi) ;\ xor %ebp,%edx ;\ xor %edx,%ebx ;\ mov %edx,32*p1+24(%edi) ;\ mov %ebx,32*p1+28(%edi) .align ALIGN32BYTESaes_set_key: pushfl push %ebp mov %esp,%ebp sub $slen,%esp push %ebx push %esi push %edi mov aes_cx(%ebp),%edx // edx -> AES context mov key_ln(%ebp),%ecx // key length cmpl $128,%ecx jb aes_30 shr $3,%ecxaes_30: cmpl $32,%ecx je aes_32 cmpl $24,%ecx je aes_32 mov $16,%ecxaes_32: shr $2,%ecx mov %ecx,nkey(%edx) lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length mov %eax,nrnd(%edx) mov in_key(%ebp),%esi // key input array lea ekey(%edx),%edi // key position in AES context cld push %ebp mov %ecx,%eax // save key length in eax rep ; movsl // words in the key schedule mov -4(%esi),%ebx // put some values in registers mov -8(%esi),%edx // to allow faster code mov -12(%esi),%ebp mov -16(%esi),%esi cmpl $4,%eax // jump on key size je aes_36 cmpl $6,%eax je aes_35 ksc8(0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -