📄 sseroutines_test.asm
字号:
format PE console
include '%fasminc%\win32axp.inc'
ITER equ 64 ; The number of iterations
Julia equ 0
Mandel equ 1
macro copyscr reg, from
{
MOVSS reg, dword[from]
SHUFPS reg, reg, 0
}
macro startm
{
xor eax, eax
cpuid
rdtsc
mov [b], eax
}
macro endm
{
xor eax, eax
cpuid
rdtsc
sub eax, [b]
}
; MUL
macro JuliaMandelPaintMUL color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm0, xmm0
MULPS xmm2, xmm1
MULPS xmm1, xmm1
MOVAPS xmm3, xmm1
ADDPS xmm1, xmm0
CMPLEPS xmm1, xmm7
SUBPS xmm0, xmm3
ADDPS xmm2, xmm2
MOVMSKPS eax, xmm1
test eax, eax
jz EXIT
if color
ANDPS xmm1, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm1
end if
MOVAPS xmm1, xmm2
if type = Julia
ADDPS xmm1, dqword[cy1]
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
; FFFF
macro JuliaMandelPaintFFFF color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm2, xmm1
MULPS xmm0, xmm0
MULPS xmm1, xmm1
addps xmm2, xmm2
movaps xmm3, xmm0
addps xmm3, xmm1
cmpltps xmm3, xmm7
movmskps eax, xmm3
test eax, eax
jz EXIT
subps xmm0, xmm1
movaps xmm1, xmm2
if color
ANDPS xmm3, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm3
end if
if type = Julia
ADDPS xmm0, dqword[cx1]
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm0, xmm4
ADDPS xmm1, xmm5
end if
dec ecx
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
; MOVADD
macro JuliaMandelPaintMOVADD color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm0, xmm0
MOVAPS xmm3, xmm1
ADDPS xmm1, xmm1
MULPS xmm3, xmm3
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
MULPS xmm1, xmm2
MOVAPS xmm2, xmm0
ADDPS xmm2, xmm3
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7
SUBPS xmm0, xmm3
MOVMSKPS eax, xmm2
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2
end if
if type = Julia
ADDPS xmm1, dqword[cy1]
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD2
macro JuliaMandelPaintMOVADD2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
ADDPS xmm2, xmm3 ; 14 - 18
SUBPS xmm0, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 18 - 22
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 20 - 24
end if
MOVMSKPS eax, xmm2 ; 22 - 28
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 26 - 30
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD2ps2dq
macro JuliaMandelPaintMOVADD2ps2dq color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -