⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sseroutines_test.asm

📁 Generating Fractals with SSE/SSE2 You probably have heard about fractals before. They are beautiful
💻 ASM
📖 第 1 页 / 共 3 页
字号:
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0	;  0 -  6
	MULPS  xmm0, xmm0	;  0 -  6
	MOVAPS xmm3, xmm1	;  1 -  7
	ADDPS  xmm1, xmm1	;  1 -  5
	MULPS  xmm1, xmm2	;  6 - 12
	MOVAPS xmm2, xmm0	;  7 - 13
	MULPS  xmm3, xmm3	;  8 - 14
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
     else if type = Mandel
	ADDPS  xmm1, xmm5	; 12 - 16
     end if
	ADDPS  xmm2, xmm3	; 14 - 18
	SUBPS  xmm0, xmm3	; 16 - 20
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
	CMPLEPS  xmm2, xmm7	; 18 - 22
     if type = Julia
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm0, xmm4	; 20 - 24
     end if
	MOVMSKPS eax, xmm2	; 22 - 28
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm2, xmm7	; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm2	; 26 - 30
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	CVTPS2DQ xmm6, xmm6
	MOVAPS dqword[scratch], xmm6
	mov eax, [scratch]
	mov ecx, [scratch+4]
	mov eax, [esi + eax]
	mov ecx, [esi + ecx]
	mov [edi], eax
	mov [edi+4], ecx
	mov eax, [scratch+8]
	mov ecx, [scratch+12]
	mov eax, [esi + eax]
	mov ecx, [esi + ecx]
	mov [edi+8], eax
	mov [edi+12], ecx
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}


;MOVADD3
macro JuliaMandelPaintMOVADD3 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0	;  0 -  6
	MULPS  xmm0, xmm0	;  0 -  6
	MOVAPS xmm3, xmm1	;  1 -  7
	ADDPS  xmm1, xmm1	;  1 -  5
	MULPS  xmm1, xmm2	;  6 - 12
	MOVAPS xmm2, xmm0	;  7 - 13
	MULPS  xmm3, xmm3	;  8 - 14
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
     else if type = Mandel
	ADDPS  xmm1, xmm5	; 12 - 16
     end if
	SUBPS  xmm0, xmm3	; 14 - 18
	ADDPS  xmm2, xmm3	; 16 - 20
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
	CMPLEPS  xmm2, xmm7	; 20 - 24
     if type = Julia
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm0, xmm4	; 18 - 22
     end if
	MOVMSKPS eax, xmm2	; 24 - 30
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm2, xmm7	; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm2	; 28 - 32
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

;MOVADD4
macro JuliaMandelPaintMOVADD4 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0	;  0 -  6
	MULPS  xmm0, xmm0	;  0 -  6
	MOVAPS xmm3, xmm1	;  1 -  7
	ADDPS  xmm1, xmm1	;  1 -  5
	MULPS  xmm1, xmm2	;  6 - 12
	MOVAPS xmm2, xmm0	;  7 - 13
	MULPS  xmm3, xmm3	;  8 - 14
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
     else if type = Mandel
	ADDPS  xmm1, xmm5	; 12 - 16
     end if
	SUBPS  xmm0, xmm3	; 14 - 18
	ADDPS  xmm2, xmm3	; 16 - 20
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
     if type = Julia
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm0, xmm4	; 18 - 22
     end if
	CMPLEPS  xmm2, xmm7	; 20 - 24
	MOVMSKPS eax, xmm2	; 24 - 30
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm2, xmm7	; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm2	; 28 - 32
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

;vod
macro JuliaMandelPaintvod color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm1	  ; 0  - 6       mov
	MULPS  xmm2, xmm2	  ; 6  - 12      fp:mul
	MULPS  xmm1, xmm0	  ; 0  - 6       fp:mul

	MOVAPS xmm3, xmm2	  ; 13 - 19      mov
	MULPS  xmm0, xmm0	  ; 2  - 8       fp:mul
	ADDPS  xmm2, xmm0	  ; 12 - 16      fp:add

	CMPLEPS  xmm2, xmm7	  ; 16 - 20      fp:add
	ADDPS  xmm1, xmm1	  ; 6  - 10      fp:add
	SUBPS  xmm0, xmm3	  ; 19 - 23      fp:add
     if type = Julia
	ADDPS  xmm1, dqword[cy1]  ; 10 - 14      fp:add
	ADDPS  xmm0, dqword[cx1]  ; 23 - 27      fp:add
     else if type = Mandel
	ADDPS  xmm1, xmm5
	ADDPS  xmm0, xmm4
     end if
	MOVMSKPS eax, xmm2	  ; 20 - 26      fp
	test eax, eax		  ; 26 - 27      alu0/1
	jz EXIT 		  ; 26 - 27      alu0/1
     if color
	ANDPS  xmm2, xmm7	  ; 21 - 23      mmx:alu
	ADDPS  xmm6, xmm2	  ; 24 - 28      fp:add
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
	;CVTPS2DQ xmm6, xmm6
	;MOVAPS  [edi], xmm6
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

;vod2
macro JuliaMandelPaintvod2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm1	  ; 0  - 6       mov
	MULPS  xmm2, xmm2	  ; 6  - 12      fp:mul
	MULPS  xmm1, xmm0	  ; 0  - 6       fp:mul

	MOVAPS xmm3, xmm2	  ; 13 - 19      mov
	MULPS  xmm0, xmm0	  ; 2  - 8       fp:mul
	ADDPS  xmm2, xmm0	  ; 12 - 16      fp:add

	ADDPS  xmm1, xmm1	  ; 6  - 10      fp:add
	CMPLEPS  xmm2, xmm7	  ; 16 - 20      fp:add
	SUBPS  xmm0, xmm3	  ; 19 - 23      fp:add
     if type = Julia
	ADDPS  xmm1, dqword[cy1]  ; 10 - 14      fp:add
	ADDPS  xmm0, dqword[cx1]  ; 23 - 27      fp:add
     else if type = Mandel
	ADDPS  xmm1, xmm5
	ADDPS  xmm0, xmm4
     end if
	MOVMSKPS eax, xmm2	  ; 20 - 26      fp
	test eax, eax		  ; 26 - 27      alu0/1
	jz EXIT 		  ; 26 - 27      alu0/1
     if color
	ANDPS  xmm2, xmm7	  ; 21 - 23      mmx:alu
	ADDPS  xmm6, xmm2	  ; 24 - 28      fp:add
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
	;CVTPS2DQ xmm6, xmm6
	;MOVAPS  [edi], xmm6
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

;vodps2dq

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -