📄 aes-x86_64.pl
字号:
sub $tp4,$acc and \$0xfefefefe,$tp2 and \$0x1b1b1b1b,$acc xor $tp2,$acc mov $acc,$tp2 and \$0x80808080,$acc mov $acc,$tp8 shr \$7,$tp8 lea 0($tp2,$tp2),$tp4 sub $tp8,$acc and \$0xfefefefe,$tp4 and \$0x1b1b1b1b,$acc xor $tp1,$tp2 # tp2^tp1 xor $tp4,$acc mov $acc,$tp4 and \$0x80808080,$acc mov $acc,$tp8 shr \$7,$tp8 sub $tp8,$acc lea 0($tp4,$tp4),$tp8 xor $tp1,$tp4 # tp4^tp1 and \$0xfefefefe,$tp8 and \$0x1b1b1b1b,$acc xor $acc,$tp8 xor $tp8,$tp1 # tp1^tp8 rol \$8,$tp1 # ROTATE(tp1^tp8,8) xor $tp8,$tp2 # tp2^tp1^tp8 xor $tp8,$tp4 # tp4^tp1^tp8 xor $tp2,$tp8 xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2 xor $tp8,$tp1 rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24) xor $tp2,$tp1 rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16) xor $tp4,$tp1 mov $tp1,$i($ptr)___}# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,# AES_KEY *key)$code.=<<___;.globl AES_set_decrypt_key.type AES_set_decrypt_key,\@function,3.align 16AES_set_decrypt_key: push %rdx # save key schedule call _x86_64_AES_set_encrypt_key cmp \$0,%eax pop %r8 # restore key schedule jne .Labort push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 mov 240(%r8),%r14d # pull number of rounds xor %rdi,%rdi lea (%rdi,%r14d,4),%rcx mov %r8,%rsi lea (%r8,%rcx,4),%rdi # pointer to last chunk.align 4.Linvert: mov 0(%rsi),%rax mov 8(%rsi),%rbx mov 0(%rdi),%rcx mov 8(%rdi),%rdx mov %rax,0(%rdi) mov %rbx,8(%rdi) mov %rcx,0(%rsi) mov %rdx,8(%rsi) lea 16(%rsi),%rsi lea -16(%rdi),%rdi cmp %rsi,%rdi jne .Linvert .picmeup %rax lea AES_Te+2048+1024-.(%rax),%rax # rcon mov 40(%rax),$mask80 mov 48(%rax),$maskfe mov 56(%rax),$mask1b mov %r8,$key sub \$1,%r14d.align 4.Lpermute: lea 16($key),$key mov 0($key),%rax mov 8($key),%rcx___ &dectransform ();$code.=<<___; mov %eax,0($key) mov %ebx,4($key) mov %ecx,8($key) mov %edx,12($key) sub \$1,%r14d jnz .Lpermute xor %rax,%rax pop %r15 pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx.Labort: ret.size AES_set_decrypt_key,.-AES_set_decrypt_key___# void AES_cbc_encrypt (const void char *inp, unsigned char *out,# size_t length, const AES_KEY *key,# unsigned char *ivp,const int enc);{# stack frame layout# -8(%rsp) return addressmy $keyp="0(%rsp)"; # one to pass as $keymy $keyend="8(%rsp)"; # &(keyp->rd_key[4*keyp->rounds])my $_rsp="16(%rsp)"; # saved %rspmy $_inp="24(%rsp)"; # copy of 1st parameter, inpmy $_out="32(%rsp)"; # copy of 2nd parameter, outmy $_len="40(%rsp)"; # copy of 3rd parameter, lengthmy $_key="48(%rsp)"; # copy of 4th parameter, keymy $_ivp="56(%rsp)"; # copy of 5th parameter, ivpmy $ivec="64(%rsp)"; # ivec[16]my $aes_key="80(%rsp)"; # copy of aes_keymy $mark="80+240(%rsp)"; # copy of aes_key->rounds$code.=<<___;.globl AES_cbc_encrypt.type AES_cbc_encrypt,\@function,6.align 16.extern OPENSSL_ia32cap_PAES_cbc_encrypt: cmp \$0,%rdx # check length je .Lcbc_just_ret push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 pushfq cld .picmeup $sbox lea AES_Te-.($sbox),$sbox cmp \$0,%r9 jne .Lcbc_picked_te lea AES_Td-AES_Te($sbox),$sbox.Lcbc_picked_te: mov OPENSSL_ia32cap_P(%rip),%eax cmp \$$speed_limit,%rdx jb .Lcbc_slow_way test \$15,%rdx jnz .Lcbc_slow_way bt \$28,%eax jc .Lcbc_slow_way # allocate aligned stack frame... lea -88-248(%rsp),$key and \$-64,$key # ... and make sure it doesn't alias with AES_T[ed] modulo 4096 mov $sbox,%r10 lea 2304($sbox),%r11 mov $key,%r12 and \$0xFFF,%r10 # s = $sbox&0xfff and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff and \$0xFFF,%r12 # p = %rsp&0xfff cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); jb .Lcbc_te_break_out sub %r11,%r12 sub %r12,$key jmp .Lcbc_te_ok.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz sub %r10,%r12 and \$0xFFF,%r12 add \$320,%r12 sub %r12,$key.align 4.Lcbc_te_ok: xchg %rsp,$key add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out mov %rdx,$_len # save copy of len mov %rcx,$_key # save copy of key mov %r8,$_ivp # save copy of ivp movl \$0,$mark # copy of aes_key->rounds = 0; mov %r8,%rbp # rearrange input arguments mov %r9,%rbx mov %rsi,$out mov %rdi,$inp mov %rcx,$key mov 240($key),%eax # key->rounds # do we copy key schedule to stack? mov $key,%r10 sub $sbox,%r10 and \$0xfff,%r10 cmp \$2304,%r10 jb .Lcbc_do_ecopy cmp \$4096-248,%r10 jb .Lcbc_skip_ecopy.align 4.Lcbc_do_ecopy: mov $key,%rsi lea $aes_key,%rdi lea $aes_key,$key mov \$240/8,%ecx .long 0x90A548F3 # rep movsq mov %eax,(%rdi) # copy aes_key->rounds.Lcbc_skip_ecopy: mov $key,$keyp # save key pointer mov \$18,%ecx.align 4.Lcbc_prefetch_te: mov 0($sbox),%r10 mov 32($sbox),%r11 mov 64($sbox),%r12 mov 96($sbox),%r13 lea 128($sbox),$sbox sub \$1,%ecx jnz .Lcbc_prefetch_te lea -2304($sbox),$sbox cmp \$0,%rbx je .LFAST_DECRYPT#----------------------------- ENCRYPT -----------------------------# mov 0(%rbp),$s0 # load iv mov 4(%rbp),$s1 mov 8(%rbp),$s2 mov 12(%rbp),$s3.align 4.Lcbc_fast_enc_loop: xor 0($inp),$s0 xor 4($inp),$s1 xor 8($inp),$s2 xor 12($inp),$s3 mov $keyp,$key # restore key mov $inp,$_inp # if ($verticalspin) save inp call _x86_64_AES_encrypt mov $_inp,$inp # if ($verticalspin) restore inp mov $_len,%r10 mov $s0,0($out) mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) lea 16($inp),$inp lea 16($out),$out sub \$16,%r10 test \$-16,%r10 mov %r10,$_len jnz .Lcbc_fast_enc_loop mov $_ivp,%rbp # restore ivp mov $s0,0(%rbp) # save ivec mov $s1,4(%rbp) mov $s2,8(%rbp) mov $s3,12(%rbp).align 4.Lcbc_cleanup: cmpl \$0,$mark # was the key schedule copied? lea $aes_key,%rdi je .Lcbc_exit mov \$240/8,%ecx xor %rax,%rax .long 0x90AB48F3 # rep stosq.Lcbc_exit: mov $_rsp,%rsp popfq pop %r15 pop %r14 pop %r13 pop %r12 pop %rbp pop %rbx.Lcbc_just_ret: ret#----------------------------- DECRYPT -----------------------------#.align 16.LFAST_DECRYPT: cmp $inp,$out je .Lcbc_fast_dec_in_place mov %rbp,$ivec.align 4.Lcbc_fast_dec_loop: mov 0($inp),$s0 # read input mov 4($inp),$s1 mov 8($inp),$s2 mov 12($inp),$s3 mov $keyp,$key # restore key mov $inp,$_inp # if ($verticalspin) save inp call _x86_64_AES_decrypt mov $ivec,%rbp # load ivp mov $_inp,$inp # if ($verticalspin) restore inp mov $_len,%r10 # load len xor 0(%rbp),$s0 # xor iv xor 4(%rbp),$s1 xor 8(%rbp),$s2 xor 12(%rbp),$s3 mov $inp,%rbp # current input, next iv sub \$16,%r10 mov %r10,$_len # update len mov %rbp,$ivec # update ivp mov $s0,0($out) # write output mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) lea 16($inp),$inp lea 16($out),$out jnz .Lcbc_fast_dec_loop mov $_ivp,%r12 # load user ivp mov 0(%rbp),%r10 # load iv mov 8(%rbp),%r11 mov %r10,0(%r12) # copy back to user mov %r11,8(%r12) jmp .Lcbc_cleanup.align 16.Lcbc_fast_dec_in_place: mov 0(%rbp),%r10 # copy iv to stack mov 8(%rbp),%r11 mov %r10,0+$ivec mov %r11,8+$ivec.align 4.Lcbc_fast_dec_in_place_loop: mov 0($inp),$s0 # load input mov 4($inp),$s1 mov 8($inp),$s2 mov 12($inp),$s3 mov $keyp,$key # restore key mov $inp,$_inp # if ($verticalspin) save inp call _x86_64_AES_decrypt mov $_inp,$inp # if ($verticalspin) restore inp mov $_len,%r10 xor 0+$ivec,$s0 xor 4+$ivec,$s1 xor 8+$ivec,$s2 xor 12+$ivec,$s3 mov 0($inp),%r11 # load input mov 8($inp),%r12 sub \$16,%r10 jz .Lcbc_fast_dec_in_place_done mov %r11,0+$ivec # copy input to iv mov %r12,8+$ivec mov $s0,0($out) # save output [zaps input] mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) lea 16($inp),$inp lea 16($out),$out mov %r10,$_len jmp .Lcbc_fast_dec_in_place_loop.Lcbc_fast_dec_in_place_done: mov $_ivp,%rdi mov %r11,0(%rdi) # copy iv back to user mov %r12,8(%rdi) mov $s0,0($out) # save output [zaps input] mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) jmp .Lcbc_cleanup#--------------------------- SLOW ROUTINE ---------------------------#.align 16.Lcbc_slow_way: # allocate aligned stack frame... lea -88(%rsp),%rbp and \$-64,%rbp # ... just "above" key schedule lea -88-63(%rcx),%rax sub %rbp,%rax neg %rax and \$0x3c0,%rax sub %rax,%rbp xchg %rsp,%rbp add \$8,%rsp # reserve for return address! mov %rbp,$_rsp # save %rsp #mov %rdi,$_inp # save copy of inp #mov %rsi,$_out # save copy of out #mov %rdx,$_len # save copy of len #mov %rcx,$_key # save copy of key mov %r8,$_ivp # save copy of ivp mov %r8,%rbp # rearrange input arguments mov %r9,%rbx mov %rsi,$out mov %rdi,$inp mov %rcx,$key mov %rdx,%r10 mov 240($key),%eax mov $key,$keyp # save key pointer shl \$4,%eax lea ($key,%rax),%rax mov %rax,$keyend # pick Te4 copy which can't "overlap" with stack frame or key scdedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax and \$0x300,%rax lea ($sbox,%rax),$sbox cmp \$0,%rbx je .LSLOW_DECRYPT#--------------------------- SLOW ENCRYPT ---------------------------# test \$-16,%r10 # check upon length mov 0(%rbp),$s0 # load iv mov 4(%rbp),$s1 mov 8(%rbp),$s2 mov 12(%rbp),$s3 jz .Lcbc_slow_enc_tail # short input....align 4.Lcbc_slow_enc_loop: xor 0($inp),$s0 xor 4($inp),$s1 xor 8($inp),$s2 xor 12($inp),$s3 mov $keyp,$key # restore key mov $inp,$_inp # save inp mov $out,$_out # save out mov %r10,$_len # save len call _x86_64_AES_encrypt_compact mov $_inp,$inp # restore inp mov $_out,$out # restore out mov $_len,%r10 # restore len mov $s0,0($out) mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) lea 16($inp),$inp lea 16($out),$out sub \$16,%r10 test \$-16,%r10 jnz .Lcbc_slow_enc_loop test \$15,%r10 jnz .Lcbc_slow_enc_tail mov $_ivp,%rbp # restore ivp mov $s0,0(%rbp) # save ivec mov $s1,4(%rbp) mov $s2,8(%rbp) mov $s3,12(%rbp) jmp .Lcbc_exit.align 4.Lcbc_slow_enc_tail: cmp $inp,$out je .Lcbc_slow_enc_in_place mov %r10,%rcx mov $inp,%rsi mov $out,%rdi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -