📄 des_enc.m4
字号:
! des_enc.m4! des_enc.S (generated from des_enc.m4)!! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.!! Version 1.0. 32-bit version.!! June 8, 2000.!! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation! by Andy Polyakov.!! January 1, 2003.!! Assembler version: Copyright Svend Olaf Mikkelsen.!! Original C code: Copyright Eric A. Young.!! This code can be freely used by LibDES/SSLeay/OpenSSL users.!! The LibDES/SSLeay/OpenSSL copyright notices must be respected.!! This version can be redistributed.!! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S!! Global registers 1 to 5 are used. This is the same as done by the! cc compiler. The UltraSPARC load/store little endian feature is used.!! Instruction grouping often refers to one CPU cycle.!! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S!! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S!! Performance improvement according to './apps/openssl speed des'!! 32-bit build:! 23% faster than cc-5.2 -xarch=v8plus -xO5! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5! 64-bit build:! 50% faster than cc-5.2 -xarch=v9 -xO5! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5!.ident "des_enc.m4 2.1"#if defined(__SUNPRO_C) && defined(__sparcv9)# define ABI64 /* They've said -xarch=v9 at command line */#elif defined(__GNUC__) && defined(__arch64__)# define ABI64 /* They've said -m64 at command line */#endif#ifdef ABI64 .register %g2,#scratch .register %g3,#scratch# define FRAME -192# define BIAS 2047# define LDPTR ldx# define STPTR stx# define ARG0 128# define ARGSZ 8# ifndef OPENSSL_SYSNAME_ULTRASPARC# define OPENSSL_SYSNAME_ULTRASPARC# endif#else# define FRAME -96# define BIAS 0# define LDPTR ld# define STPTR st# define ARG0 68# define ARGSZ 4#endif#define LOOPS 7#define global0 %g0#define global1 %g1#define global2 %g2#define global3 %g3#define global4 %g4#define global5 %g5#define local0 %l0#define local1 %l1#define local2 %l2#define local3 %l3#define local4 %l4#define local5 %l5#define local7 %l6#define local6 %l7#define in0 %i0#define in1 %i1#define in2 %i2#define in3 %i3#define in4 %i4#define in5 %i5#define in6 %i6#define in7 %i7#define out0 %o0#define out1 %o1#define out2 %o2#define out3 %o3#define out4 %o4#define out5 %o5#define out6 %o6#define out7 %o7#define stub stbchangequote({,})! Macro definitions:! {ip_macro}!! The logic used in initial and final permutations is the same as in! the C code. The permutations are done with a clever shift, xor, and! technique.!! The macro also loads address sbox 1 to 5 to global 1 to 5, address! sbox 6 to local6, and addres sbox 8 to out3.!! Rotates the halfs 3 left to bring the sbox bits in convenient positions.!! Loads key first round from address in parameter 5 to out0, out1.!! After the the original LibDES initial permutation, the resulting left! is in the variable initially used for right and vice versa. The macro! implements the possibility to keep the halfs in the original registers.!! parameter 1 left! parameter 2 right! parameter 3 result left (modify in first round)! parameter 4 result right (use in first round)! parameter 5 key address! parameter 6 1/2 for include encryption/decryption! parameter 7 1 for move in1 to in3! parameter 8 1 for move in3 to in4, 2 for move in4 to in3! parameter 9 1 for load ks3 and ks2 to in4 and in3define(ip_macro, {! {ip_macro}! $1 $2 $4 $3 $5 $6 $7 $8 $9 ld [out2+256], local1 srl $2, 4, local4 xor local4, $1, local4 ifelse($7,1,{mov in1, in3},{nop}) ld [out2+260], local2 and local4, local1, local4 ifelse($8,1,{mov in3, in4},{}) ifelse($8,2,{mov in4, in3},{}) ld [out2+280], out4 ! loop counter sll local4, 4, local1 xor $1, local4, $1 ld [out2+264], local3 srl $1, 16, local4 xor $2, local1, $2 ifelse($9,1,{LDPTR KS3, in4},{}) xor local4, $2, local4 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr ifelse($9,1,{LDPTR KS2, in3},{}) and local4, local2, local4 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr sll local4, 16, local1 xor $2, local4, $2 srl $2, 2, local4 xor $1, local1, $1 sethi %hi(16711680), local5 xor local4, $1, local4 and local4, local3, local4 or local5, 255, local5 sll local4, 2, local2 xor $1, local4, $1 srl $1, 8, local4 xor $2, local2, $2 xor local4, $2, local4 add global1, 768, global4 and local4, local5, local4 add global1, 1024, global5 ld [out2+272], local7 sll local4, 8, local1 xor $2, local4, $2 srl $2, 1, local4 xor $1, local1, $1 ld [$5], out0 ! key 7531 xor local4, $1, local4 add global1, 256, global2 ld [$5+4], out1 ! key 8642 and local4, local7, local4 add global1, 512, global3 sll local4, 1, local1 xor $1, local4, $1 sll $1, 3, local3 xor $2, local1, $2 sll $2, 3, local2 add global1, 1280, local6 ! address sbox 8 srl $1, 29, local4 add global1, 1792, out3 ! address sbox 8 srl $2, 29, local1 or local4, local3, $4 or local2, local1, $3 ifelse($6, 1, { ld [out2+284], local5 ! 0x0000FC00 used in the rounds or local2, local1, $3 xor $4, out0, local1 call .des_enc.1 and local1, 252, local1 },{}) ifelse($6, 2, { ld [out2+284], local5 ! 0x0000FC00 used in the rounds or local2, local1, $3 xor $4, out0, local1 call .des_dec.1 and local1, 252, local1 },{})})! {rounds_macro}!! The logic used in the DES rounds is the same as in the C code,! except that calculations for sbox 1 and sbox 5 begin before! the previous round is finished.!! In each round one half (work) is modified based on key and the! other half (use).!! In this version we do two rounds in a loop repeated 7 times! and two rounds seperately.!! One half has the bits for the sboxes in the following positions:!! 777777xx555555xx333333xx111111xx!! 88xx666666xx444444xx222222xx8888!! The bits for each sbox are xor-ed with the key bits for that box.! The above xx bits are cleared, and the result used for lookup in! the sbox table. Each sbox entry contains the 4 output bits permuted! into 32 bits according to the P permutation.!! In the description of DES, left and right are switched after! each round, except after last round. In this code the original! left and right are kept in the same register in all rounds, meaning! that after the 16 rounds the result for right is in the register! originally used for left.!! parameter 1 first work (left in first round)! parameter 2 first use (right in first round)! parameter 3 enc/dec 1/-1! parameter 4 loop label! parameter 5 key address register! parameter 6 optional address for key next encryption/decryption! parameter 7 not empty for include retl!! also compares in2 to 8define(rounds_macro, {! {rounds_macro}! $1 $2 $3 $4 $5 $6 $7 $8 $9 xor $2, out0, local1 ld [out2+284], local5 ! 0x0000FC00 ba $4 and local1, 252, local1 .align 32$4: ! local6 is address sbox 6 ! out3 is address sbox 8 ! out4 is loop counter ld [global1+local1], local1 xor $2, out1, out1 ! 8642 xor $2, out0, out0 ! 7531 fmovs %f0, %f0 ! fxor used for alignment srl out1, 4, local0 ! rotate 4 right and out0, local5, local3 ! 3 fmovs %f0, %f0 ld [$5+$3*8], local7 ! key 7531 next round srl local3, 8, local3 ! 3 and local0, 252, local2 ! 2 fmovs %f0, %f0 ld [global3+local3],local3 ! 3 sll out1, 28, out1 ! rotate xor $1, local1, $1 ! 1 finished, local1 now sbox 7 ld [global2+local2], local2 ! 2 srl out0, 24, local1 ! 7 or out1, local0, out1 ! rotate ldub [out2+local1], local1 ! 7 (and 0xFC) srl out1, 24, local0 ! 8 and out1, local5, local4 ! 4 ldub [out2+local0], local0 ! 8 (and 0xFC) srl local4, 8, local4 ! 4 xor $1, local2, $1 ! 2 finished local2 now sbox 6 ld [global4+local4],local4 ! 4 srl out1, 16, local2 ! 6 xor $1, local3, $1 ! 3 finished local3 now sbox 5 ld [out3+local0],local0 ! 8 and local2, 252, local2 ! 6 add global1, 1536, local5 ! address sbox 7 ld [local6+local2], local2 ! 6 srl out0, 16, local3 ! 5 xor $1, local4, $1 ! 4 finished ld [local5+local1],local1 ! 7 and local3, 252, local3 ! 5 xor $1, local0, $1 ! 8 finished ld [global5+local3],local3 ! 5 xor $1, local2, $1 ! 6 finished subcc out4, 1, out4 ld [$5+$3*8+4], out0 ! key 8642 next round xor $1, local7, local2 ! sbox 5 next round xor $1, local1, $1 ! 7 finished srl local2, 16, local2 ! sbox 5 next round xor $1, local3, $1 ! 5 finished ld [$5+$3*16+4], out1 ! key 8642 next round again and local2, 252, local2 ! sbox5 next round! next round xor $1, local7, local7 ! 7531 ld [global5+local2], local2 ! 5 srl local7, 24, local3 ! 7 xor $1, out0, out0 ! 8642 ldub [out2+local3], local3 ! 7 (and 0xFC) srl out0, 4, local0 ! rotate 4 right and local7, 252, local1 ! 1 sll out0, 28, out0 ! rotate xor $2, local2, $2 ! 5 finished local2 used srl local0, 8, local4 ! 4 and local0, 252, local2 ! 2 ld [local5+local3], local3 ! 7 srl local0, 16, local5 ! 6 or out0, local0, out0 ! rotate ld [global2+local2], local2 ! 2 srl out0, 24, local0 ld [$5+$3*16], out0 ! key 7531 next round and local4, 252, local4 ! 4 and local5, 252, local5 ! 6 ld [global4+local4], local4 ! 4 xor $2, local3, $2 ! 7 finished local3 used and local0, 252, local0 ! 8 ld [local6+local5], local5 ! 6 xor $2, local2, $2 ! 2 finished local2 now sbox 3 srl local7, 8, local2 ! 3 start ld [out3+local0], local0 ! 8 xor $2, local4, $2 ! 4 finished and local2, 252, local2 ! 3 ld [global1+local1], local1 ! 1 xor $2, local5, $2 ! 6 finished local5 used ld [global3+local2], local2 ! 3 xor $2, local0, $2 ! 8 finished add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer ld [out2+284], local5 ! 0x0000FC00 xor $2, out0, local4 ! sbox 1 next round xor $2, local1, $2 ! 1 finished xor $2, local2, $2 ! 3 finished#ifdef OPENSSL_SYSNAME_ULTRASPARC bne,pt %icc, $4#else bne $4#endif and local4, 252, local1 ! sbox 1 next round! two rounds more: ld [global1+local1], local1 xor $2, out1, out1 xor $2, out0, out0 srl out1, 4, local0 ! rotate and out0, local5, local3 ld [$5+$3*8], local7 ! key 7531 srl local3, 8, local3 and local0, 252, local2 ld [global3+local3],local3 sll out1, 28, out1 ! rotate xor $1, local1, $1 ! 1 finished, local1 now sbox 7 ld [global2+local2], local2 srl out0, 24, local1 or out1, local0, out1 ! rotate ldub [out2+local1], local1 srl out1, 24, local0 and out1, local5, local4 ldub [out2+local0], local0 srl local4, 8, local4 xor $1, local2, $1 ! 2 finished local2 now sbox 6 ld [global4+local4],local4 srl out1, 16, local2 xor $1, local3, $1 ! 3 finished local3 now sbox 5 ld [out3+local0],local0 and local2, 252, local2 add global1, 1536, local5 ! address sbox 7 ld [local6+local2], local2 srl out0, 16, local3 xor $1, local4, $1 ! 4 finished ld [local5+local1],local1 and local3, 252, local3 xor $1, local0, $1 ld [global5+local3],local3 xor $1, local2, $1 ! 6 finished cmp in2, 8 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter xor $1, local7, local2 ! sbox 5 next round xor $1, local1, $1 ! 7 finished ld [$5+$3*8+4], out0 srl local2, 16, local2 ! sbox 5 next round xor $1, local3, $1 ! 5 finished and local2, 252, local2! next round (two rounds more) xor $1, local7, local7 ! 7531 ld [global5+local2], local2 srl local7, 24, local3 xor $1, out0, out0 ! 8642 ldub [out2+local3], local3 srl out0, 4, local0 ! rotate and local7, 252, local1 sll out0, 28, out0 ! rotate xor $2, local2, $2 ! 5 finished local2 used srl local0, 8, local4 and local0, 252, local2 ld [local5+local3], local3 srl local0, 16, local5 or out0, local0, out0 ! rotate ld [global2+local2], local2 srl out0, 24, local0 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption and local4, 252, local4 and local5, 252, local5 ld [global4+local4], local4 xor $2, local3, $2 ! 7 finished local3 used and local0, 252, local0 ld [local6+local5], local5 xor $2, local2, $2 ! 2 finished local2 now sbox 3 srl local7, 8, local2 ! 3 start ld [out3+local0], local0 xor $2, local4, $2 and local2, 252, local2 ld [global1+local1], local1 xor $2, local5, $2 ! 6 finished local5 used ld [global3+local2], local2 srl $1, 3, local3 xor $2, local0, $2 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption sll $1, 29, local4 xor $2, local1, $2 ifelse($7,{}, {}, {retl}) xor $2, local2, $2})! {fp_macro}!! parameter 1 right (original left)! parameter 2 left (original right)! parameter 3 1 for optional store to [in0]! parameter 4 1 for load input/output address to local5/7!! The final permutation logic switches the halfes, meaning that! left and right ends up the the registers originally used.define(fp_macro, {! {fp_macro}! $1 $2 $3 $4 $5 $6 $7 $8 $9 ! initially undo the rotate 3 left done after initial permutation ! original left is received shifted 3 right and 29 left in local3/4 sll $2, 29, local1 or local3, local4, $1 srl $2, 3, $2 sethi %hi(0x55555555), local2 or $2, local1, $2 or local2, %lo(0x55555555), local2 srl $2, 1, local3 sethi %hi(0x00ff00ff), local1 xor local3, $1, local3 or local1, %lo(0x00ff00ff), local1 and local3, local2, local3 sethi %hi(0x33333333), local4 sll local3, 1, local2 xor $1, local3, $1 srl $1, 8, local3 xor $2, local2, $2 xor local3, $2, local3 or local4, %lo(0x33333333), local4 and local3, local1, local3 sethi %hi(0x0000ffff), local1 sll local3, 8, local2 xor $2, local3, $2 srl $2, 2, local3 xor $1, local2, $1 xor local3, $1, local3 or local1, %lo(0x0000ffff), local1 and local3, local4, local3 sethi %hi(0x0f0f0f0f), local4 sll local3, 2, local2 ifelse($4,1, {LDPTR INPUT, local5}) xor $1, local3, $1 ifelse($4,1, {LDPTR OUTPUT, local7}) srl $1, 16, local3 xor $2, local2, $2 xor local3, $2, local3 or local4, %lo(0x0f0f0f0f), local4 and local3, local1, local3 sll local3, 16, local2 xor $2, local3, local1 srl local1, 4, local3 xor $1, local2, $1 xor local3, $1, local3 and local3, local4, local3 sll local3, 4, local2 xor $1, local3, $1 ! optional store: ifelse($3,1, {st $1, [in0]}) xor local1, local2, $2 ifelse($3,1, {st $2, [in0+4]})})! {fp_ip_macro}!! Does initial permutation for next block mixed with! final permutation for current block.!! parameter 1 original left! parameter 2 original right! parameter 3 left ip! parameter 4 right ip! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4! 2: mov in4 to in3!! also adds -8 to length in2 and loads loop counter to out4define(fp_ip_macro, {! {fp_ip_macro}! $1 $2 $3 $4 $5 $6 $7 $8 $9 define({temp1},{out4}) define({temp2},{local3}) define({ip1},{local1}) define({ip2},{local2}) define({ip4},{local4}) define({ip5},{local5}) ! $1 in local3, local4 ld [out2+256], ip1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -