📄 x64masm.asm
字号:
movdqa xmm4, [rsp + 4*16 + 256]paddd xmm4, [rsp + 4*16]movdqa xmm5, [rsp + 1*16 + 256]paddd xmm5, [rsp + 1*16]movdqa xmm6, [rsp + 14*16 + 256]paddd xmm6, [rsp + 14*16]movdqa xmm7, [rsp + 11*16 + 256]paddd xmm7, [rsp + 11*16]call labelSSE2_Salsa_Outputmovdqa xmm4, [rsp + 8*16 + 256]paddd xmm4, [rsp + 8*16]movdqa xmm5, [rsp + 5*16 + 256]paddd xmm5, [rsp + 5*16]movdqa xmm6, [rsp + 2*16 + 256]paddd xmm6, [rsp + 2*16]movdqa xmm7, [rsp + 15*16 + 256]paddd xmm7, [rsp + 15*16]call labelSSE2_Salsa_Outputmovdqa xmm4, [rsp + 12*16 + 256]paddd xmm4, [rsp + 12*16]movdqa xmm5, [rsp + 9*16 + 256]paddd xmm5, [rsp + 9*16]movdqa xmm6, [rsp + 6*16 + 256]paddd xmm6, [rsp + 6*16]movdqa xmm7, [rsp + 3*16 + 256]paddd xmm7, [rsp + 3*16]call labelSSE2_Salsa_Outputtest rdx, rdxjz label9add rdx, 12*16label9:add rcx, 12*16sub r8, 4cmp r8, 4jge label1label5:sub r8, 1jl label4movdqa xmm0, [r10 + 0*16]movdqa xmm1, [r10 + 1*16]movdqa xmm2, [r10 + 2*16]movdqa xmm3, [r10 + 3*16]mov rax, r9label0:movdqa xmm4, xmm3paddd xmm4, xmm0movdqa xmm5, xmm4pslld xmm4, 7psrld xmm5, 32-7pxor xmm1, xmm4pxor xmm1, xmm5movdqa xmm4, xmm0paddd xmm4, xmm1movdqa xmm5, xmm4pslld xmm4, 9psrld xmm5, 32-9pxor xmm2, xmm4pxor xmm2, xmm5movdqa xmm4, xmm1paddd xmm4, xmm2movdqa xmm5, xmm4pslld xmm4, 13psrld xmm5, 32-13pxor xmm3, xmm4pxor xmm3, xmm5movdqa xmm4, xmm2paddd xmm4, xmm3movdqa xmm5, xmm4pslld xmm4, 18psrld xmm5, 32-18pxor xmm0, xmm4pxor xmm0, xmm5pshufd xmm1, xmm1, 2*64+1*16+0*4+3pshufd xmm2, xmm2, 1*64+0*16+3*4+2pshufd xmm3, xmm3, 0*64+3*16+2*4+1movdqa xmm4, xmm1paddd xmm4, xmm0movdqa xmm5, xmm4pslld xmm4, 7psrld xmm5, 32-7pxor xmm3, xmm4pxor xmm3, xmm5movdqa xmm4, xmm0paddd xmm4, xmm3movdqa xmm5, xmm4pslld xmm4, 9psrld xmm5, 32-9pxor xmm2, xmm4pxor xmm2, xmm5movdqa xmm4, xmm3paddd xmm4, xmm2movdqa xmm5, xmm4pslld xmm4, 13psrld xmm5, 32-13pxor xmm1, xmm4pxor xmm1, xmm5movdqa xmm4, xmm2paddd xmm4, xmm1movdqa xmm5, xmm4pslld xmm4, 18psrld xmm5, 32-18pxor xmm0, xmm4pxor xmm0, xmm5pshufd xmm1, xmm1, 0*64+3*16+2*4+1pshufd xmm2, xmm2, 1*64+0*16+3*4+2pshufd xmm3, xmm3, 2*64+1*16+0*4+3sub eax, 2jnz label0paddd xmm0, [r10 + 0*16]paddd xmm1, [r10 + 1*16]paddd xmm2, [r10 + 2*16]paddd xmm3, [r10 + 3*16]add dword ptr [r10 + 8*4], 1adc dword ptr [r10 + 5*4], 0pcmpeqb xmm6, xmm6psrlq xmm6, 32pshufd xmm7, xmm6, 0*64+1*16+2*4+3movdqa xmm4, xmm0movdqa xmm5, xmm3pand xmm0, xmm7pand xmm4, xmm6pand xmm3, xmm6pand xmm5, xmm7por xmm4, xmm5movdqa xmm5, xmm1pand xmm1, xmm7pand xmm5, xmm6por xmm0, xmm5pand xmm6, xmm2pand xmm2, xmm7por xmm1, xmm6por xmm2, xmm3movdqa xmm5, xmm4movdqa xmm6, xmm0shufpd xmm4, xmm1, 2shufpd xmm0, xmm2, 2shufpd xmm1, xmm5, 2shufpd xmm2, xmm6, 2test rdx, rdxjz labelSSE2_Salsa_Output_B3test rdx, 15jnz labelSSE2_Salsa_Output_B7pxor xmm4, [rdx+0*16]pxor xmm0, [rdx+1*16]pxor xmm1, [rdx+2*16]pxor xmm2, [rdx+3*16]add rdx, 4*16jmp labelSSE2_Salsa_Output_B3labelSSE2_Salsa_Output_B7:movdqu xmm3, [rdx+0*16]pxor xmm4, xmm3movdqu xmm3, [rdx+1*16]pxor xmm0, xmm3movdqu xmm3, [rdx+2*16]pxor xmm1, xmm3movdqu xmm3, [rdx+3*16]pxor xmm2, xmm3add rdx, 4*16labelSSE2_Salsa_Output_B3:test rcx, 15jnz labelSSE2_Salsa_Output_B8movdqa [rcx+0*16], xmm4movdqa [rcx+1*16], xmm0movdqa [rcx+2*16], xmm1movdqa [rcx+3*16], xmm2jmp labelSSE2_Salsa_Output_B9labelSSE2_Salsa_Output_B8:movdqu [rcx+0*16], xmm4movdqu [rcx+1*16], xmm0movdqu [rcx+2*16], xmm1movdqu [rcx+3*16], xmm2labelSSE2_Salsa_Output_B9:add rcx, 4*16jmp label5label4:movdqa xmm6, [rsp + 0200h]movdqa xmm7, [rsp + 0210h]movdqa xmm8, [rsp + 0220h]movdqa xmm9, [rsp + 0230h]movdqa xmm10, [rsp + 0240h]movdqa xmm11, [rsp + 0250h]movdqa xmm12, [rsp + 0260h]movdqa xmm13, [rsp + 0270h]movdqa xmm14, [rsp + 0280h]movdqa xmm15, [rsp + 0290h]add rsp, 10*16 + 32*16 + 8retSalsa20_OperateKeystream ENDPALIGN 8Sosemanuk_OperateKeystream PROC FRAMErex_push_reg rsipush_reg rdialloc_stack(80*4*2+12*4+8*8 + 2*16+8)save_xmm128 xmm6, 02f0hsave_xmm128 xmm7, 0300h.endprologmov rdi, r8mov rax, r9mov QWORD PTR [rsp+1*8], rdimov QWORD PTR [rsp+2*8], rdxmov QWORD PTR [rsp+6*8], raxlea rcx, [4*rcx+rcx]lea rsi, [4*rcx]mov QWORD PTR [rsp+3*8], rsimovdqa xmm0, [rax+0*16]movdqa [rsp + 8*8+0*16], xmm0movdqa xmm0, [rax+1*16]movdqa [rsp + 8*8+1*16], xmm0movq xmm0, QWORD PTR [rax+2*16]movq QWORD PTR [rsp + 8*8+2*16], xmm0psrlq xmm0, 32movd r10d, xmm0mov ecx, [rax+10*4]mov edx, [rax+11*4]pcmpeqb xmm7, xmm7label2:lea rdi, [rsp + 8*8 + 12*4]mov rax, 80cmp rsi, 80cmovg rsi, raxmov QWORD PTR [rsp+7*8], rsilea rsi, [rdi+rsi]mov QWORD PTR [rsp+4*8], rsilea rsi, s_sosemanukMulTableslabel0:mov eax, [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4]mov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((0+3)-((0+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((0+2)-((0+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4]mov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((1+3)-((1+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((1+2)-((1+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4]mov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((2+3)-((2+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((2+2)-((2+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4]mov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((3+3)-((3+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((3+2)-((3+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4]mov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((4+3)-((4+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((4+2)-((4+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4]mov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((5+3)-((5+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((5+2)-((5+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4]mov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((6+3)-((6+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((6+2)-((6+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4]mov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((7+3)-((7+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((7+2)-((7+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((8+0)-((8+0)/(10))*(10))*4]mov [rdi + (((8)-((8)/(4))*(4))*20 + (8/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((8)-((8)/(4))*(4))*20 + (8/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((8+3)-((8+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((8+2)-((8+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((8+0)-((8+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((9+0)-((9+0)/(10))*(10))*4]mov [rdi + (((9)-((9)/(4))*(4))*20 + (9/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((9)-((9)/(4))*(4))*20 + (9/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((9+3)-((9+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((9+2)-((9+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((9+0)-((9+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((10+0)-((10+0)/(10))*(10))*4]mov [rdi + (((10)-((10)/(4))*(4))*20 + (10/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((10)-((10)/(4))*(4))*20 + (10/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((10+3)-((10+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((10+2)-((10+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul edx, 54655307hrol edx, 7mov [rsp + 8*8 + ((10+0)-((10+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((11+0)-((11+0)/(10))*(10))*4]mov [rdi + (((11)-((11)/(4))*(4))*20 + (11/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + ecx]xor r11d, edxmov [rdi + (((11)-((11)/(4))*(4))*20 + (11/4)) * 4], r11dmov r11d, 1and r11d, ecxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((11+3)-((11+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((11+2)-((11+2)/(10))*(10))*4]add edx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eaximul ecx, 54655307hrol ecx, 7mov [rsp + 8*8 + ((11+0)-((11+0)/(10))*(10))*4], r10dmov eax, [rsp + 8*8 + ((12+0)-((12+0)/(10))*(10))*4]mov [rdi + (((12)-((12)/(4))*(4))*20 + (12/4)) * 4 + 80*4], eaxrol eax, 8lea r11d, [r10d + edx]xor r11d, ecxmov [rdi + (((12)-((12)/(4))*(4))*20 + (12/4)) * 4], r11dmov r11d, 1and r11d, edxneg r11dand r11d, r10dxor r10d, eaxmovzx eax, alxor r10d, [rsi+rax*4]mov eax, [rsp + 8*8 + ((12+3)-((12+3)/(10))*(10))*4]xor r11d, [rsp + 8*8 + ((12+2)-((12+2)/(10))*(10))*4]add ecx, r11dmovzx r11d, alshr eax, 8xor r10d, [rsi+1024+r11*4]xor r10d, eax
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -