📄 aes-ia64.s
字号:
{ .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;;{ .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };;.endp AES_encrypt#// *AES_decrypt are autogenerated by the following script:#if 0#!/usr/bin/env perlprint "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);print "#endif\n";while(<>) { $process=1 if (/\.proc\s+_ia64_AES_encrypt/); next if (!$process); #s/te00=s0/td00=s0/; s/te00/td00/g; s/te11=s1/td13=s3/; s/te11/td13/g; #s/te22=s2/td22=s2/; s/te22/td22/g; s/te33=s3/td31=s1/; s/te33/td31/g; #s/te01=s1/td01=s1/; s/te01/td01/g; s/te12=s2/td10=s0/; s/te12/td10/g; #s/te23=s3/td23=s3/; s/te23/td23/g; s/te30=s0/td32=s2/; s/te30/td32/g; #s/te02=s2/td02=s2/; s/te02/td02/g; s/te13=s3/td11=s1/; s/te13/td11/g; #s/te20=s0/td20=s0/; s/te20/td20/g; s/te31=s1/td33=s3/; s/te31/td33/g; #s/te03=s3/td03=s3/; s/te03/td03/g; s/te10=s0/td12=s2/; s/te10/td12/g; #s/te21=s1/td21=s1/; s/te21/td21/g; s/te32=s2/td30=s0/; s/te32/td30/g; s/td/te/g; s/AES_encrypt/AES_decrypt/g; s/\.Le_/.Ld_/g; s/AES_Te#/AES_Td#/g; print; exit if (/\.endp\s+AES_decrypt/);}#endif.proc _ia64_AES_decrypt#// Input: rk0-rk1// te0// te3 as AES_KEY->rounds!!!// s0-s3// maskff,twenty4,sixteen// Output: r16,r20,r24,r28 as s0-s3// Clobber: r16-r31,rk0-rk1,r32-r43.align 32_ia64_AES_decrypt:{ .mmi; alloc r16=ar.pfs,12,0,0,8 LDKEY t0=[rk0],2*KSZ mov pr.rot=1<<16 }{ .mmi; LDKEY t1=[rk1],2*KSZ add te1=1024,te0 add te3=-3,te3 };;{ .mib; LDKEY t2=[rk0],2*KSZ mov ar.ec=3 }{ .mib; LDKEY t3=[rk1],2*KSZ add te2=2048,te0 brp.loop.imp .Ld_top,.Ld_end-16 };;{ .mmi; xor s0=s0,t0 xor s1=s1,t1 mov ar.lc=te3 }{ .mmi; xor s2=s2,t2 xor s3=s3,t3 add te3=3072,te0 };;.align 32.Ld_top:{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0] (p0) and te31=s1,maskff // 0/0:s3&0xff (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1] (p0) and te32=s2,maskff // 0/1:s0&0xff (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2] (p0) shladd te31=te31,2,te3 // 1/0:te0+s0>>24 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3] (p0) shladd te32=te32,2,te3 // 1/1:te3+s0 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff] (p0) shladd te22=te22,2,te2 // 2/0:te2+s2>>8&0xff (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0] (p0) shladd te23=te23,2,te2 // 2/1:te2+s3>>8 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8] (p0) shladd te20=te20,2,te2 // 3/2:te2+s0>>8 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8] (p0) shladd te00=te00,2,te0 // 3/0:te0+s0>>24 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8] (p0) shladd te21=te21,2,te2 // 4/3:te3+s2 (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24] (p0) shladd te01=te01,2,te0 // 4/1:te0+s1>>24 (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8] (p0) shladd te13=te13,2,te1 // 5/0:te1+s1>>16 (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24] (p0) shladd te02=te02,2,te0 // 5/2:te0+s2>>24 (p0) and te33=s3,maskff };; // 5/2:s1&0xff{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16] (p0) shladd te10=te10,2,te1 // 6/1:te1+s2>>16 (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24] (p0) shladd te03=te03,2,te0 // 6/3:te1+s0>>16 (p0) and te30=s0,maskff };; // 6/3:s2&0xff{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16] (p0) shladd te33=te33,2,te3 // 7/2:te3+s1&0xff (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24] (p0) shladd te30=te30,2,te3 // 7/3:te3+s2 (p0) xor t0=t0,te31 };; // 7/0:{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1] (p0) shladd te11=te11,2,te1 // 8/2:te1+s3>>16 (p0) xor t0=t0,te22 } // 8/0:{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2] (p0) shladd te12=te12,2,te1 // 8/3:te1+s0>>16 (p0) xor t1=t1,te32 };; // 8/1:{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16] (p0) xor t0=t0,te00 // 9/0: (p0) xor t1=t1,te23 } // 9/1: { .mmi; (p0) ld4 te12=[te12] // 9/3:te1[s0>>16] (p0) xor t2=t2,te20 // 9/2: (p0) xor t3=t3,te21 };; // 9/3:{ .mmi; (p0) xor t0=t0,te13 // 10/0:done! (p0) xor t1=t1,te01 // 10/1: (p0) xor t2=t2,te02 } // 10/2:{ .mmi; (p0) xor t3=t3,te03 // 10/3: (p16) cmp.eq p0,p17=r0,r0 };; // 10/clear (p17){ .mmi; (p0) xor t1=t1,te10 // 11/1:done! (p0) xor t2=t2,te33 // 11/2: (p0) xor t3=t3,te30 } // 11/3:{ .mmi; (p17) add te0=4096,te0 // 11/ (p17) add te1=4096,te1 };; // 11/{ .mib; (p0) xor t2=t2,te11 // 12/2:done! (p0) xor t3=t3,te12 } // 12/3:done!{ .mib; (p17) add te2=4096,te2 // 12/ (p17) add te3=4096,te3 // 12/ br.ctop.sptk .Ld_top };;.Ld_end:{ .mib; mov r16=s0 mov r20=s1 }{ .mib; mov r24=s2 mov r28=s3 br.ret.sptk b6 };;.endp _ia64_AES_decrypt#// void AES_decrypt (const void *in,void *out,const AES_KEY *key);.global AES_decrypt#.proc AES_decrypt#.align 32.skip 16AES_decrypt: .prologue .fframe 0 .save ar.pfs,r2 .save ar.lc,r3{ .mmi; alloc r2=ar.pfs,3,0,12,0 addl out8=@ltoff(AES_Td#),gp mov r3=ar.lc }{ .mmi; and out0=3,in0 ADDP in0=0,in0 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds .body{ .mmi; ld8 out8=[out8] // Te0 ld4 out11=[out11] // AES_KEY->rounds mov prsave=pr }#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...{ .mib; cmp.ne p6,p0=out0,r0 add out0=4,in0(p6) br.dpnt.many .Ld_i_unaligned };;{ .mmi; ld4 out1=[in0],8 // s0 and out9=3,in1 mov twenty4=24 }{ .mmi; ld4 out3=[out0],8 // s1 ADDP rk0=0,in2 mov sixteen=16 };;{ .mmi; ld4 out5=[in0] // s2 cmp.ne p6,p0=out9,r0 mov maskff=0xff }{ .mmb; ld4 out7=[out0] // s3 ADDP rk1=KSZ,in2 br.call.sptk.many b6=_ia64_AES_decrypt };;{ .mib; ADDP in0=4,in1 ADDP in1=0,in1(p6) br.spnt .Ld_o_unaligned };;{ .mii; mov ar.pfs=r2 mov ar.lc=r3 }{ .mmi; st4 [in1]=r16,8 // s0 st4 [in0]=r20,8 // s1 mov pr=prsave,0x1ffff };;{ .mmb; st4 [in1]=r24 // s2 st4 [in0]=r28 // s3 br.ret.sptk.many b0 };;#endif.align 32.Ld_i_unaligned:{ .mmi; add out0=1,in0 add out2=2,in0 add out4=3,in0 };;{ .mmi; ld1 r16=[in0],4 ld1 r17=[out0],4 }//;;{ .mmi; ld1 r18=[out2],4 ld1 out1=[out4],4 };; // s0{ .mmi; ld1 r20=[in0],4 ld1 r21=[out0],4 }//;;{ .mmi; ld1 r22=[out2],4 ld1 out3=[out4],4 };; // s1{ .mmi; ld1 r24=[in0],4 ld1 r25=[out0],4 }//;;{ .mmi; ld1 r26=[out2],4 ld1 out5=[out4],4 };; // s2{ .mmi; ld1 r28=[in0] ld1 r29=[out0] }//;;{ .mmi; ld1 r30=[out2] ld1 out7=[out4] };; // s3{ .mii; dep out1=r16,out1,24,8 //;; dep out3=r20,out3,24,8 }//;;{ .mii; ADDP rk0=0,in2 dep out5=r24,out5,24,8 //;; dep out7=r28,out7,24,8 };;{ .mii; ADDP rk1=KSZ,in2 dep out1=r17,out1,16,8 //;; dep out3=r21,out3,16,8 }//;;{ .mii; mov twenty4=24 dep out5=r25,out5,16,8 //;; dep out7=r29,out7,16,8 };;{ .mii; mov sixteen=16 dep out1=r18,out1,8,8 //;; dep out3=r22,out3,8,8 }//;;{ .mii; mov maskff=0xff dep out5=r26,out5,8,8 //;; dep out7=r30,out7,8,8 };;{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };;.Ld_o_unaligned:{ .mii; ADDP out0=0,in1 extr.u r17=r16,8,8 // s0 shr.u r19=r16,twenty4 }//;;{ .mii; ADDP out1=1,in1 extr.u r18=r16,16,8 shr.u r23=r20,twenty4 }//;; // s1{ .mii; ADDP out2=2,in1 extr.u r21=r20,8,8 shr.u r22=r20,sixteen }//;;{ .mii; ADDP out3=3,in1 extr.u r25=r24,8,8 // s2 shr.u r27=r24,twenty4 };;{ .mii; st1 [out3]=r16,4 extr.u r26=r24,16,8 shr.u r31=r28,twenty4 }//;; // s3{ .mii; st1 [out2]=r17,4 extr.u r29=r28,8,8 shr.u r30=r28,sixteen }//;;{ .mmi; st1 [out1]=r18,4 st1 [out0]=r19,4 };;{ .mmi; st1 [out3]=r20,4 st1 [out2]=r21,4 }//;;{ .mmi; st1 [out1]=r22,4 st1 [out0]=r23,4 };;{ .mmi; st1 [out3]=r24,4 st1 [out2]=r25,4 mov pr=prsave,0x1ffff }//;;{ .mmi; st1 [out1]=r26,4 st1 [out0]=r27,4 mov ar.pfs=r2 };;{ .mmi; st1 [out3]=r28 st1 [out2]=r29 mov ar.lc=r3 }//;;{ .mmb; st1 [out1]=r30 st1 [out0]=r31 br.ret.sptk.many b0 };;.endp AES_decrypt#// leave it in .text segment....align 64
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -