📄 scalar.nas
字号:
fmul dword [edx] faddp st1,st0 fld dword [eax + 4] fmul dword [edx + 4] faddp st1,st0 fld dword [eax + 8] fmul dword [edx + 8] faddp st1,st0 fld dword [eax + 12] fmul dword [edx + 12] faddp st1,st0 add eax,byte 16 add edx,byte 16 dec ecx jnz .lbl4.ret2endprocproc scalar04_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar08_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar12_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+32] pmov mm3,qword [eax+40] pfmul mm2,qword [edx+32] pfmul mm3,qword [edx+40] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar16_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+32] pmov mm3,qword [eax+40] pfmul mm2,qword [edx+32] pfmul mm3,qword [edx+40] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+48] pmov mm3,qword [eax+56] pfmul mm2,qword [edx+48] pfmul mm3,qword [edx+56] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar20_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+32] pmov mm3,qword [eax+40] pfmul mm2,qword [edx+32] pfmul mm3,qword [edx+40] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+48] pmov mm3,qword [eax+56] pfmul mm2,qword [edx+48] pfmul mm3,qword [edx+56] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+64] pmov mm3,qword [eax+72] pfmul mm2,qword [edx+64] pfmul mm3,qword [edx+72] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar24_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+32] pmov mm3,qword [eax+40] pfmul mm2,qword [edx+32] pfmul mm3,qword [edx+40] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+48] pmov mm3,qword [eax+56] pfmul mm2,qword [edx+48] pfmul mm3,qword [edx+56] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+64] pmov mm3,qword [eax+72] pfmul mm2,qword [edx+64] pfmul mm3,qword [edx+72] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+80] pmov mm3,qword [eax+88] pfmul mm2,qword [edx+80] pfmul mm3,qword [edx+88] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar32_float32_3DNow%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] pmov mm2,qword [eax+16] pmov mm3,qword [eax+24] pfmul mm2,qword [edx+16] pfmul mm3,qword [edx+24] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+32] pmov mm3,qword [eax+40] pfmul mm2,qword [edx+32] pfmul mm3,qword [edx+40] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+48] pmov mm3,qword [eax+56] pfmul mm2,qword [edx+48] pfmul mm3,qword [edx+56] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+64] pmov mm3,qword [eax+72] pfmul mm2,qword [edx+64] pfmul mm3,qword [edx+72] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+80] pmov mm3,qword [eax+88] pfmul mm2,qword [edx+80] pfmul mm3,qword [edx+88] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+96] pmov mm3,qword [eax+104] pfmul mm2,qword [edx+96] pfmul mm3,qword [edx+104] pfadd mm0,mm2 pfadd mm1,mm3 pmov mm2,qword [eax+112] pmov mm3,qword [eax+120] pfmul mm2,qword [edx+112] pfmul mm3,qword [edx+120] pfadd mm0,mm2 pfadd mm1,mm3 pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar4n_float32_3DNow%$p arg 4%$q arg 4%$len arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] mov ecx,[sp(%$len)] pmov mm0,qword [eax] pmov mm1,qword [eax+8] pfmul mm0,qword [edx] pfmul mm1,qword [edx+8] dec ecx jz .ret4 add eax,byte 16 add edx,byte 16.lbl4: pmov mm2,qword [eax] pmov mm3,qword [eax+8] pfmul mm2,qword [edx] pfmul mm3,qword [edx+8] add eax,byte 16 add edx,byte 16 pfadd mm0,mm2 pfadd mm1,mm3 dec ecx jnz .lbl4.ret4: pfadd mm0,mm1 pmov qword [sp(%$p)],mm0 femms fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar1n_float32_3DNow jmp scalar24_float32_i387endprocproc scalar04_float32_SIMD jmp scalar04_float32_i387endprocproc scalar08_float32_SIMD%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] movups xmm0, [eax] movups xmm1, [eax+16] mulps xmm0, [edx] mulps xmm1, [edx+16] addps xmm0,xmm1 sub esp,16 movups [esp],xmm0 fld dword [esp+ 0] fadd dword [esp+ 4] fadd dword [esp+ 8] fadd dword [esp+12] add esp,16endprocproc scalar12_float32_SIMD jmp scalar12_float32_i387endprocproc scalar16_float32_SIMD%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] movups xmm0, [eax] movups xmm1, [eax+16] mulps xmm0, [edx] mulps xmm1, [edx+16] movups xmm2, [eax+32] movups xmm3, [eax+48] mulps xmm2, [edx+32] mulps xmm3, [edx+48] addps xmm0,xmm2 addps xmm1,xmm3 addps xmm0,xmm1 sub esp,16 movups [esp],xmm0 fld dword [esp+ 0] fadd dword [esp+ 4] fadd dword [esp+ 8] fadd dword [esp+12] add esp,16endprocproc scalar20_float32_SIMD jmp scalar20_float32_i387endprocproc scalar24_float32_SIMD%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] movups xmm0, [eax] movups xmm1, [eax+16] mulps xmm0, [edx] mulps xmm1, [edx+16] movups xmm2, [eax+32] movups xmm3, [eax+48] mulps xmm2, [edx+32] mulps xmm3, [edx+48] addps xmm0,xmm2 addps xmm1,xmm3 movups xmm2, [eax+64] movups xmm3, [eax+80] mulps xmm2, [edx+64] mulps xmm3, [edx+80] addps xmm0,xmm2 addps xmm1,xmm3 addps xmm0,xmm1 sub esp,16 movups [esp],xmm0 fld dword [esp+ 0] fadd dword [esp+ 4] fadd dword [esp+ 8] fadd dword [esp+12] add esp,16endprocproc scalar32_float32_SIMD%$p arg 4%$q arg 4 mov eax,[sp(%$p)] mov edx,[sp(%$q)] movups xmm0, [eax] movups xmm1, [eax+16] mulps xmm0, [edx] mulps xmm1, [edx+16] movups xmm2, [eax+32] movups xmm3, [eax+48] mulps xmm2, [edx+32] mulps xmm3, [edx+48] addps xmm0,xmm2 addps xmm1,xmm3 movups xmm2, [eax+64] movups xmm3, [eax+80] mulps xmm2, [edx+64] mulps xmm3, [edx+80] addps xmm0,xmm2 addps xmm1,xmm3 movups xmm2, [eax+96] movups xmm3, [eax+112] mulps xmm2, [edx+96] mulps xmm3, [edx+112] addps xmm0,xmm2 addps xmm1,xmm3 addps xmm0,xmm1 ;sub esp,16 ;movups [esp],xmm0 ;fld dword [esp+ 0] ;fadd dword [esp+ 4] ;fadd dword [esp+ 8] ;fadd dword [esp+12] ;add esp,16 movhlps xmm1,xmm0 addps xmm0,xmm1 movlps [sp(%$p)],xmm0 fld dword [sp(%$p)] fadd dword [sp(%$p)+4]endprocproc scalar4n_float32_SIMD jmp scalar4n_float32_i387endprocproc scalar1n_float32_SIMD jmp scalar1n_float32_i387endproc; end of scalar.nas
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -