📄 aes-amd64.s
字号:
//// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.// All rights reserved.//// TERMS//// Redistribution and use in source and binary forms, with or without// modification, are permitted subject to the following conditions://// 1. Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// 2. Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// 3. The copyright holder's name must not be used to endorse or promote// any products derived from this software without his specific prior// written permission.//// This software is provided 'as is' with no express or implied warranties// of correctness or fitness for purpose.// Modified by Jari Ruusu, December 24 2001// - Converted syntax to GNU CPP/assembler syntax// - C programming interface converted back to "old" API// - Minor portability cleanups and speed optimizations// Modified by Jari Ruusu, April 11 2002// - Added above copyright and terms to resulting object code so that// binary distributions can avoid legal trouble// Modified by Jari Ruusu, June 12 2004// - Converted 32 bit x86 code to 64 bit AMD64 code// - Re-wrote encrypt and decrypt code from scratch// An AES (Rijndael) implementation for the AMD64. This version only// implements the standard AES block length (128 bits, 16 bytes). This code// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are// preserved across calls.// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])#if defined(USE_UNDERLINE)# define aes_set_key _aes_set_key# define aes_encrypt _aes_encrypt# define aes_decrypt _aes_decrypt#endif#if !defined(ALIGN64BYTES)# define ALIGN64BYTES 64#endif .file "aes-amd64.S" .globl aes_set_key .globl aes_encrypt .globl aes_decrypt .section .rodatacopyright: .ascii " \000" .ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000" .ascii "All rights reserved.\000" .ascii " \000" .ascii "TERMS\000" .ascii " \000" .ascii " Redistribution and use in source and binary forms, with or without\000" .ascii " modification, are permitted subject to the following conditions:\000" .ascii " \000" .ascii " 1. Redistributions of source code must retain the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer.\000" .ascii " \000" .ascii " 2. Redistributions in binary form must reproduce the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer in the\000" .ascii " documentation and/or other materials provided with the distribution.\000" .ascii " \000" .ascii " 3. The copyright holder's name must not be used to endorse or promote\000" .ascii " any products derived from this software without his specific prior\000" .ascii " written permission.\000" .ascii " \000" .ascii " This software is provided 'as is' with no express or implied warranties\000" .ascii " of correctness or fitness for purpose.\000" .ascii " \000"#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)// offsets in context structure#define nkey 0 // key length, size 4#define nrnd 4 // number of rounds, size 4#define ekey 8 // encryption key schedule base address, size 256#define dkey 264 // decryption key schedule base address, size 256// This macro performs a forward encryption cycle. It is entered with// the first previous round column values in I1E, I2E, I3E and I4E and// exits with the final values OU1, OU2, OU3 and OU4 registers.#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \ movl p2(%rbp),OU1 ;\ movl p2+4(%rbp),OU2 ;\ movl p2+8(%rbp),OU3 ;\ movl p2+12(%rbp),OU4 ;\ movzbl I1B,%edi ;\ movzbl I2B,%esi ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ shrl $8,I3E ;\ shrl $8,I4E ;\ xorl p1(,%rdi,4),OU1 ;\ xorl p1(,%rsi,4),OU2 ;\ xorl p1(,%r8,4),OU3 ;\ xorl p1(,%r13,4),OU4 ;\ movzbl I2H,%esi ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ movzbl I1H,%edi ;\ shrl $8,I3E ;\ shrl $8,I4E ;\ xorl p1+tlen(,%rsi,4),OU1 ;\ xorl p1+tlen(,%r8,4),OU2 ;\ xorl p1+tlen(,%r13,4),OU3 ;\ xorl p1+tlen(,%rdi,4),OU4 ;\ shrl $16,I1E ;\ shrl $16,I2E ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ movzbl I1B,%edi ;\ movzbl I2B,%esi ;\ xorl p1+2*tlen(,%r8,4),OU1 ;\ xorl p1+2*tlen(,%r13,4),OU2 ;\ xorl p1+2*tlen(,%rdi,4),OU3 ;\ xorl p1+2*tlen(,%rsi,4),OU4 ;\ shrl $8,I4E ;\ movzbl I1H,%edi ;\ movzbl I2H,%esi ;\ shrl $8,I3E ;\ xorl p1+3*tlen(,I4R,4),OU1 ;\ xorl p1+3*tlen(,%rdi,4),OU2 ;\ xorl p1+3*tlen(,%rsi,4),OU3 ;\ xorl p1+3*tlen(,I3R,4),OU4// This macro performs an inverse encryption cycle. It is entered with// the first previous round column values in I1E, I2E, I3E and I4E and// exits with the final values OU1, OU2, OU3 and OU4 registers.#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \ movl p2+12(%rbp),OU4 ;\ movl p2+8(%rbp),OU3 ;\ movl p2+4(%rbp),OU2 ;\ movl p2(%rbp),OU1 ;\ movzbl I4B,%edi ;\ movzbl I3B,%esi ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ shrl $8,I2E ;\ shrl $8,I1E ;\ xorl p1(,%rdi,4),OU4 ;\ xorl p1(,%rsi,4),OU3 ;\ xorl p1(,%r8,4),OU2 ;\ xorl p1(,%r13,4),OU1 ;\ movzbl I3H,%esi ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ movzbl I4H,%edi ;\ shrl $8,I2E ;\ shrl $8,I1E ;\ xorl p1+tlen(,%rsi,4),OU4 ;\ xorl p1+tlen(,%r8,4),OU3 ;\ xorl p1+tlen(,%r13,4),OU2 ;\ xorl p1+tlen(,%rdi,4),OU1 ;\ shrl $16,I4E ;\ shrl $16,I3E ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ movzbl I4B,%edi ;\ movzbl I3B,%esi ;\ xorl p1+2*tlen(,%r8,4),OU4 ;\ xorl p1+2*tlen(,%r13,4),OU3 ;\ xorl p1+2*tlen(,%rdi,4),OU2 ;\ xorl p1+2*tlen(,%rsi,4),OU1 ;\ shrl $8,I1E ;\ movzbl I4H,%edi ;\ movzbl I3H,%esi ;\ shrl $8,I2E ;\ xorl p1+3*tlen(,I1R,4),OU4 ;\ xorl p1+3*tlen(,%rdi,4),OU3 ;\ xorl p1+3*tlen(,%rsi,4),OU2 ;\ xorl p1+3*tlen(,I2R,4),OU1// AES (Rijndael) Encryption Subroutine// rdi = pointer to AES context// rsi = pointer to input plaintext bytes// rdx = pointer to output ciphertext bytes .text .align ALIGN64BYTESaes_encrypt: movl (%rsi),%eax // read in plaintext movl 4(%rsi),%ecx movl 8(%rsi),%r10d movl 12(%rsi),%r11d pushq %rbp leaq ekey+16(%rdi),%rbp // encryption key pointer movq %rdx,%r9 // pointer to out block movl nrnd(%rdi),%edx // number of rounds pushq %rbx pushq %r13 pushq %r14 pushq %r15 xorl -16(%rbp),%eax // xor in first round key xorl -12(%rbp),%ecx xorl -8(%rbp),%r10d xorl -4(%rbp),%r11d subl $10,%edx je aes_15 addq $32,%rbp subl $2,%edx je aes_13 addq $32,%rbp fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) jmp aes_13 .align ALIGN64BYTESaes_13: fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) jmp aes_15 .align ALIGN64BYTESaes_15: fwd_rnd(aes_ft_tab,0, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) popq %r15 popq %r14 popq %r13 popq %rbx popq %rbp movl %eax,(%r9) // move final values to the output array. movl %ecx,4(%r9) movl %r10d,8(%r9) movl %r11d,12(%r9) ret// AES (Rijndael) Decryption Subroutine// rdi = pointer to AES context// rsi = pointer to input ciphertext bytes// rdx = pointer to output plaintext bytes .align ALIGN64BYTESaes_decrypt: movl 12(%rsi),%eax // read in ciphertext movl 8(%rsi),%ecx movl 4(%rsi),%r10d movl (%rsi),%r11d pushq %rbp leaq dkey+16(%rdi),%rbp // decryption key pointer movq %rdx,%r9 // pointer to out block movl nrnd(%rdi),%edx // number of rounds pushq %rbx pushq %r13 pushq %r14 pushq %r15 xorl -4(%rbp),%eax // xor in first round key xorl -8(%rbp),%ecx xorl -12(%rbp),%r10d xorl -16(%rbp),%r11d subl $10,%edx je aes_25 addq $32,%rbp subl $2,%edx je aes_23 addq $32,%rbp inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) jmp aes_23 .align ALIGN64BYTESaes_23: inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) jmp aes_25 .align ALIGN64BYTESaes_25: inv_rnd(aes_it_tab,0, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) popq %r15 popq %r14 popq %r13 popq %rbx popq %rbp movl %eax,12(%r9) // move final values to the output array. movl %ecx,8(%r9) movl %r10d,4(%r9) movl %r11d,(%r9) ret// AES (Rijndael) Key Schedule Subroutine// This macro performs a column mixing operation on an input 32-bit// word to give a 32-bit result. It uses each of the 4 bytes in the// the input column to index 4 different tables of 256 32-bit words// that are xored together to form the output value.#define mix_col(p1) \ movzbl %bl,%ecx ;\ movl p1(,%rcx,4),%eax ;\ movzbl %bh,%ecx ;\ ror $16,%ebx ;\ xorl p1+tlen(,%rcx,4),%eax ;\ movzbl %bl,%ecx ;\ xorl p1+2*tlen(,%rcx,4),%eax ;\ movzbl %bh,%ecx ;\ xorl p1+3*tlen(,%rcx,4),%eax// Key Schedule Macros#define ksc4(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xorl 4*p1+aes_rcon_tab,%eax ;\ xorl %eax,%esi ;\ xorl %esi,%ebp ;\ movl %esi,16*p1(%rdi) ;\ movl %ebp,16*p1+4(%rdi) ;\ xorl %ebp,%edx ;\ xorl %edx,%ebx ;\ movl %edx,16*p1+8(%rdi) ;\ movl %ebx,16*p1+12(%rdi)#define ksc6(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xorl 4*p1+aes_rcon_tab,%eax ;\ xorl 24*p1-24(%rdi),%eax ;\ movl %eax,24*p1(%rdi) ;\ xorl 24*p1-20(%rdi),%eax ;\ movl %eax,24*p1+4(%rdi) ;\ xorl %eax,%esi ;\ xorl %esi,%ebp ;\ movl %esi,24*p1+8(%rdi) ;\ movl %ebp,24*p1+12(%rdi) ;\ xorl %ebp,%edx ;\ xorl %edx,%ebx ;\ movl %edx,24*p1+16(%rdi) ;\ movl %ebx,24*p1+20(%rdi)#define ksc8(p1) \ rol $24,%ebx ;\ mix_col(aes_fl_tab) ;\ ror $8,%ebx ;\ xorl 4*p1+aes_rcon_tab,%eax ;\ xorl 32*p1-32(%rdi),%eax ;\ movl %eax,32*p1(%rdi) ;\ xorl 32*p1-28(%rdi),%eax ;\ movl %eax,32*p1+4(%rdi) ;\ xorl 32*p1-24(%rdi),%eax ;\ movl %eax,32*p1+8(%rdi) ;\ xorl 32*p1-20(%rdi),%eax ;\ movl %eax,32*p1+12(%rdi) ;\ pushq %rbx ;\ movl %eax,%ebx ;\ mix_col(aes_fl_tab) ;\ popq %rbx ;\ xorl %eax,%esi ;\ xorl %esi,%ebp ;\ movl %esi,32*p1+16(%rdi) ;\ movl %ebp,32*p1+20(%rdi) ;\ xorl %ebp,%edx ;\ xorl %edx,%ebx ;\ movl %edx,32*p1+24(%rdi) ;\ movl %ebx,32*p1+28(%rdi)// rdi = pointer to AES context// rsi = pointer to key bytes// rdx = key length, bytes or bits// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt .align ALIGN64BYTESaes_set_key: pushfq pushq %rbp pushq %rbx movq %rcx,%r11 // ed_flg movq %rdx,%rcx // key length movq %rdi,%r10 // AES context cmpl $128,%ecx jb aes_30 shrl $3,%ecxaes_30: cmpl $32,%ecx je aes_32 cmpl $24,%ecx je aes_32 movl $16,%ecxaes_32: shrl $2,%ecx movl %ecx,nkey(%r10) leaq 6(%rcx),%rax // 10/12/14 for 4/6/8 32-bit key length movl %eax,nrnd(%r10) leaq ekey(%r10),%rdi // key position in AES context cld movl %ecx,%eax // save key length in eax rep ; movsl // words in the key schedule movl -4(%rsi),%ebx // put some values in registers movl -8(%rsi),%edx // to allow faster code movl -12(%rsi),%ebp movl -16(%rsi),%esi cmpl $4,%eax // jump on key size je aes_36 cmpl $6,%eax je aes_35 ksc8(0) ksc8(1) ksc8(2) ksc8(3) ksc8(4) ksc8(5)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -