📄 sseroutines_test.asm
字号:
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
ADDPS xmm2, xmm3 ; 14 - 18
SUBPS xmm0, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 18 - 22
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 20 - 24
end if
MOVMSKPS eax, xmm2 ; 22 - 28
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 26 - 30
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTPS2DQ xmm6, xmm6
MOVAPS dqword[scratch], xmm6
mov eax, [scratch]
mov ecx, [scratch+4]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi], eax
mov [edi+4], ecx
mov eax, [scratch+8]
mov ecx, [scratch+12]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi+8], eax
mov [edi+12], ecx
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD3
macro JuliaMandelPaintMOVADD3 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
SUBPS xmm0, xmm3 ; 14 - 18
ADDPS xmm2, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 20 - 24
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 18 - 22
end if
MOVMSKPS eax, xmm2 ; 24 - 30
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 28 - 32
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD4
macro JuliaMandelPaintMOVADD4 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
SUBPS xmm0, xmm3 ; 14 - 18
ADDPS xmm2, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 18 - 22
end if
CMPLEPS xmm2, xmm7 ; 20 - 24
MOVMSKPS eax, xmm2 ; 24 - 30
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 28 - 32
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vod
macro JuliaMandelPaintvod color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm1 ; 0 - 6 mov
MULPS xmm2, xmm2 ; 6 - 12 fp:mul
MULPS xmm1, xmm0 ; 0 - 6 fp:mul
MOVAPS xmm3, xmm2 ; 13 - 19 mov
MULPS xmm0, xmm0 ; 2 - 8 fp:mul
ADDPS xmm2, xmm0 ; 12 - 16 fp:add
CMPLEPS xmm2, xmm7 ; 16 - 20 fp:add
ADDPS xmm1, xmm1 ; 6 - 10 fp:add
SUBPS xmm0, xmm3 ; 19 - 23 fp:add
if type = Julia
ADDPS xmm1, dqword[cy1] ; 10 - 14 fp:add
ADDPS xmm0, dqword[cx1] ; 23 - 27 fp:add
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
MOVMSKPS eax, xmm2 ; 20 - 26 fp
test eax, eax ; 26 - 27 alu0/1
jz EXIT ; 26 - 27 alu0/1
if color
ANDPS xmm2, xmm7 ; 21 - 23 mmx:alu
ADDPS xmm6, xmm2 ; 24 - 28 fp:add
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
;CVTPS2DQ xmm6, xmm6
;MOVAPS [edi], xmm6
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vod2
macro JuliaMandelPaintvod2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm1 ; 0 - 6 mov
MULPS xmm2, xmm2 ; 6 - 12 fp:mul
MULPS xmm1, xmm0 ; 0 - 6 fp:mul
MOVAPS xmm3, xmm2 ; 13 - 19 mov
MULPS xmm0, xmm0 ; 2 - 8 fp:mul
ADDPS xmm2, xmm0 ; 12 - 16 fp:add
ADDPS xmm1, xmm1 ; 6 - 10 fp:add
CMPLEPS xmm2, xmm7 ; 16 - 20 fp:add
SUBPS xmm0, xmm3 ; 19 - 23 fp:add
if type = Julia
ADDPS xmm1, dqword[cy1] ; 10 - 14 fp:add
ADDPS xmm0, dqword[cx1] ; 23 - 27 fp:add
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
MOVMSKPS eax, xmm2 ; 20 - 26 fp
test eax, eax ; 26 - 27 alu0/1
jz EXIT ; 26 - 27 alu0/1
if color
ANDPS xmm2, xmm7 ; 21 - 23 mmx:alu
ADDPS xmm6, xmm2 ; 24 - 28 fp:add
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
;CVTPS2DQ xmm6, xmm6
;MOVAPS [edi], xmm6
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vodps2dq
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -