⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sseroutines_test.asm

📁 Generating Fractals with SSE/SSE2 You probably have heard about fractals before. They are beautiful
💻 ASM
📖 第 1 页 / 共 3 页
字号:
format PE console

include '%fasminc%\win32axp.inc'

ITER equ 64 ; The number of iterations
Julia equ 0
Mandel equ 1

macro copyscr reg, from
{
	MOVSS	 reg, dword[from]
	SHUFPS	 reg, reg, 0
}
macro startm
{
	xor eax, eax
	cpuid
	rdtsc
	mov [b], eax
}
macro endm
{
	xor eax, eax
	cpuid
	rdtsc
	sub eax, [b]
}

; MUL
macro JuliaMandelPaintMUL color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0
	MULPS  xmm0, xmm0
	MULPS  xmm2, xmm1
	MULPS  xmm1, xmm1
	MOVAPS xmm3, xmm1
	ADDPS  xmm1, xmm0
	CMPLEPS  xmm1, xmm7
	SUBPS  xmm0, xmm3
	ADDPS  xmm2, xmm2
	MOVMSKPS eax, xmm1
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm1, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm1
     end if
	MOVAPS xmm1, xmm2
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm1, xmm5
	ADDPS  xmm0, xmm4
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

; FFFF
macro JuliaMandelPaintFFFF color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0
	MULPS  xmm2, xmm1
	MULPS  xmm0, xmm0
	MULPS  xmm1, xmm1
	addps xmm2, xmm2
	movaps xmm3, xmm0
	addps xmm3, xmm1
	cmpltps xmm3, xmm7
	movmskps eax, xmm3
	test eax, eax
	jz EXIT
	subps xmm0, xmm1
	movaps xmm1, xmm2
     if color
	ANDPS  xmm3, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm3
     end if
     if type = Julia
	ADDPS  xmm0, dqword[cx1]
	ADDPS  xmm1, dqword[cy1]
     else if type = Mandel
	ADDPS  xmm0, xmm4
	ADDPS  xmm1, xmm5
     end if
	dec ecx
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

; MOVADD
macro JuliaMandelPaintMOVADD color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0
	MULPS  xmm0, xmm0
	MOVAPS xmm3, xmm1
	ADDPS  xmm1, xmm1
	MULPS  xmm3, xmm3
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
	MULPS  xmm1, xmm2
	MOVAPS xmm2, xmm0
	ADDPS  xmm2, xmm3
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
	CMPLEPS  xmm2, xmm7
	SUBPS  xmm0, xmm3
	MOVMSKPS eax, xmm2
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm2, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm2
     end if
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm1, xmm5
	ADDPS  xmm0, xmm4
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}


;MOVADD2
macro JuliaMandelPaintMOVADD2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAPS xmm2, xmm0	;  0 -  6
	MULPS  xmm0, xmm0	;  0 -  6
	MOVAPS xmm3, xmm1	;  1 -  7
	ADDPS  xmm1, xmm1	;  1 -  5
	MULPS  xmm1, xmm2	;  6 - 12
	MOVAPS xmm2, xmm0	;  7 - 13
	MULPS  xmm3, xmm3	;  8 - 14
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
     if type = Julia
	ADDPS  xmm1, dqword[cy1]
     else if type = Mandel
	ADDPS  xmm1, xmm5	; 12 - 16
     end if
	ADDPS  xmm2, xmm3	; 14 - 18
	SUBPS  xmm0, xmm3	; 16 - 20
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
	CMPLEPS  xmm2, xmm7	; 18 - 22
     if type = Julia
	ADDPS  xmm0, dqword[cx1]
     else if type = Mandel
	ADDPS  xmm0, xmm4	; 20 - 24
     end if
	MOVMSKPS eax, xmm2	; 22 - 28
	test eax, eax
	jz EXIT
     if color
	ANDPS  xmm2, xmm7	; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDPS  xmm6, xmm2	; 26 - 30
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
     if color
	;21-22 cycles
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi], eax
	SHUFPS xmm6, xmm6, 0E5h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+4], eax
	SHUFPS xmm6, xmm6, 0E6h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+8], eax
	SHUFPS xmm6, xmm6, 0E7h
	CVTSS2SI eax, xmm6
	mov eax, [esi + eax]
	mov [edi+12], eax
     else
	ANDNPS	xmm2, dqword[maskbw]
	MOVAPS	[edi], xmm2
     end if
	add edi, 16
	ADDPS  xmm4, dqword[dx1]
	sub ebx, 4
	jnz XLOOP

	ADDPS  xmm5, dqword[dy1]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx

}

;MOVADD2ps2dq
macro JuliaMandelPaintMOVADD2ps2dq color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
     if type = Julia
	copyscr xmm0, cx2
	copyscr xmm1, cy2
     end if
	copyscr xmm2, dx2
	copyscr xmm3, dy2
     if type = Julia
	MOVAPS dqword[cx1], xmm0
	MOVAPS dqword[cy1], xmm1
     end if
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	JMP	BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAPS xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAPS xmm0,xmm4
	XORPS  xmm6,xmm6
	MOVAPS xmm1,xmm5
	mov ecx, ITER

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -