📄 mmxoctave.asm

📁 国外游戏开发者杂志1997年第九期配套代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
上一页 12
PAND	    MM3, MM7

PSRLW	    MM5, 2
PAND	    MM4, MM7

PAND	    MM5, MM7
PSUBW	    MM2, MM1	   ;MM2 = g_b##_1 for pixel #3

PSUBW	    MM3, MM1	   ;MM3 = g_b##_1 for pixel #1
PSUBW	    MM4, MM1	   ;MM4 = g_b##_0 for pixel #3

PSUBW	    MM5, MM1	   ;MM5 = g_b##_0 for pixel #1

;Take above data for g_b00_0, b_b01_0, g_b10_0, g_b11_0 for pix 1 and 3
;and rearrange the packed values in the MMX registers.
;Output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM2 = | g_b00_0 p3   | g_b00_1 p3   | g_b01_0 p3   | g_b01_1 p3   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM3 = | g_b00_0 p1   |	g_b00_1 p1   | g_b01_0 p1   | g_b01_1 p1   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM6 = | g_b10_0 p3   | g_b10_1 p3   | g_b11_0 p3   | g_b11_1 p3   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM7 = | g_b10_0 p1   |	g_b10_1 p1   | g_b11_0 p1   | g_b11_1 p1   |
;      +-----------------------------------------------------------+

MOVQ	    MM6, MM2

MOVQ	    MM7, MM3
PUNPCKHWD   MM2, MM4	   ;MM2 = g_b00_# and g_b01_# for pix #3

PUNPCKLWD   MM6, MM4	   ;MM6 = g_b10_# and g_b11_# for pix #3

PUNPCKHWD   MM3, MM5	   ;MM3 = g_b00_# and g_b01_# for pix #1
MOVQ	    MM4, MM0	   ;Preparing for rx1 and ry1 calculation

PUNPCKLWD   MM7, MM5	   ;MM7 = g_b10_# and g_b11_# for pix #1

;Calculation of the rx1, ry1 values for both pixels.  Final output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM4 = |       RX1 p1 |       RX1 p3 |	     RY1 p1 |	    RY1 p3 |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;rx1 = rx0 - 256;
;ry1 = ry0 - 256;

PSUBW	    MM4, MM1	   ;MM4 = rx1 and ry1 parameters


;Setup for the calculation of u1 and u2 for pix #1.  Final output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM1 = |       RX0 p1 |	     RY0 p1 |	     RX0 p1 |	    RY1 p1 |
;      +-----------------------------------------------------------+

MOVQ	    MM5, MM0
MOVQ	    MM1, MM4

PSRLD	    MM5, 16

PSRAD	    MM1, 16

PSLLQ	    MM1, 32

PUNPCKHDQ   MM1, MM5

PACKSSDW    MM1, MM1

PACKSSDW    MM5, MM5

PUNPCKLDQ   MM1, MM5

;Calculation for U1 and U2 for pixel #1 -> After multiplication... Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM3 = | U1 for pixel #1	  | U2 for pixel #1	     |
;      +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;u1 = rx0 * g_b00_0 + ry0 * g_b00_1;
;u2 = rx0 * g_b01_0 + ry1 * g_b01_1;
PMADDWD     MM3, MM1       ;43u, MM3 = u1 and u2 for pixel #1

;Setup for the calculation of v1 and v2 for pix #1.  Final output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM5 = |       RX1 p1 |	     RY0 p1 |	     RX1 p1 |	    RY1 p1 |
;      +-----------------------------------------------------------+

MOVQ	    MM5, MM4

PSRAD	    MM5, 16
MOVQ	    MM1, MM0

PSRLD	    MM1, 16

PSLLQ	    MM1, 32

PUNPCKHDQ   MM1, MM5

PACKSSDW    MM1, MM1

PACKSSDW    MM5, MM5

PUNPCKLDQ   MM5, MM1

;Calculation for V1 and V2 for pixel #1 -> After multiplication... Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM7 = | V1 for pixel #1	  | V2 for pixel #1	     |
;      +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;v1 = rx1 * g_b00_0 + ry0 * g_b00_1;
;v2 = rx1 * g_b01_0 + ry1 * g_b01_1;

PMADDWD     MM7, MM5	   ;MM7 = v1 and v2 for pixel #1

;Setup for the calculation of u1 and u2 for pix #3.  Final output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM1 = |       RX0 p3 |	     RY0 p3 |	     RX0 p3 |	    RY1 p3 |
;      +-----------------------------------------------------------+

MOVQ	    MM5, MM0

PSLLD	    MM5, 16

PSRLD	    MM5, 16
MOVQ	    MM1, MM4

PSLLD	    MM1, 16

PSRAD	    MM1, 16

PUNPCKLDQ   MM1, MM1

PUNPCKHDQ   MM1, MM5

PACKSSDW    MM1, MM1

PACKSSDW    MM5, MM5

PUNPCKLDQ   MM1, MM5

;Calculation for U1 and U2 for pixel #3 -> After multiplication... Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM2 = | U1 for pixel #3	  | U2 for pixel #3	     |
;      +-----------------------------------------------------+
PMADDWD     MM2, MM1	   ;MM2 = u1 and u2 for pixel #3


;Setup for the calculation of v1 and v2 for pix #3.  Final output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM4 = |       RX1 p3 |	     RY0 p3 |	     RX1 p3 |	    RY1 p3 |
;      +-----------------------------------------------------------+

PSLLD	    MM4, 16

PSRAD	    MM4, 16
MOVQ	    MM5, MM0

PSLLD	    MM5, 16

PSRAD	    MM5, 16

PUNPCKLDQ   MM5, MM5

PUNPCKHDQ   MM5, MM4

PACKSSDW    MM5, MM5

PACKSSDW    MM4, MM4

PUNPCKLDQ   MM4, MM5

;Calculation for V1 and V2 for pixel #3 -> After multiplication... Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM6 = | V1 for pixel #3	  | V2 for pixel #3	     |
;      +-----------------------------------------------------+
PMADDWD     MM6, MM4	   ;MM6 = v1 and v2 for pixel #2

;Calculation for SX and SY for pixels #1 and #3, Output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM1 = |       SX  p1 |	     SX  p3 |	     SY  p1 |	    SY	p3 |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;sx = (((rx0 * rx0) >> 1) * ((1536 - (rx0 << 2)))) >> 16;
;sy = (((ry0 * ry0) >> 1) * ((1536 - (ry0 << 2)))) >> 16;
MOVQ	    MM5, MM0

PMULLW	    MM5, MM5
MOVQ	    MM4, MM0

MOVQ	    MM1, DWORD PTR mask_quad_1536
PSLLW	    MM4, 2

PSUBD	    MM6, MM2	   ;V1 - U1 and V2 - U2 for P3
PSUBD	    MM7, MM3	   ;V1 - U1 and V2 - U2 for P1

PSUBW	    MM1, MM4
PSRLW	    MM5, 1

PMULHW	    MM1, MM5	   ;MM1 = sx and sy param for pix 1, 3

;Calculation of A and B for pixel #1 and #3. Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM7 = | A for pixel #1		  | B for pixel #1	     |
;      +-----------------------------------------------------+
;      +-----------------------------------------------------+
;MM6 = | A for pixel #3 	  | B for pixel #3	     |
;      +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;a = u1 + sx * ((v1 - u1) >> 8);
;b = u2 + sx * ((v2 - u2) >> 8);
PSRAD	    MM7, 8

PSRAD	    MM6, 8

MOVQ	    MM4, MM1
MOVQ	    MM5, MM1

PSRLQ	    MM4, 16

PUNPCKLWD   MM1, MM1

PUNPCKHDQ   MM4, MM4

PMADDWD     MM7, MM4
PSLLD	    MM5, 16

MOVQ	    MM4, DWORD PTR v	    ;Used for incrementing v for next 4 pix
PSRLD	    MM5, 16

PUNPCKHDQ   MM5, MM5
;PADDD	    MM4, MM0	    ;Used for incrementing v for next 4 pix
PADDD	    MM4, DWORD PTR dv	    ;Used for incrementing v for next 4 pix

PADDD	    MM7, MM3	    ;MM7 = a and b parameter for pix #1
PMADDWD     MM6, MM5

MOVQ	    MM3, DWORD PTR mask_double_65536
PSRLD	    MM1, 16

MOVQ	    DWORD PTR v, MM4	    ;Used for incrementing v for next 4 pix

;Calculation of color indexes for pixel #1 and #3. Output:
;      |--------- 32 bit ---------|
;      +-----------------------------------------------------+
;MM7 = | Color index for pixel #1 | Color index for pixel #3 |
;      +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;color = (a + 65536 + sy * ((b - a) >> 8)) >> 9;
PADDD	    MM6, MM2	    ;MM6 = a and b parameter for pix #3

MOVQ	    MM4, DWORD PTR mask_quad_510
MOVQ	    MM2, MM6

PUNPCKLDQ   MM6, MM7

MOVD	          MM0, ebx	    ;Move the last color written into MM2
PUNPCKHDQ   MM2, MM7

PADDD	    MM3, MM2
PSUBD	    MM6, MM2

PSRAD	    MM6, 8

PMADDWD     MM6, MM1

PADDD	    MM6, MM3

PSRLD	    MM6, 9	    ;MM6 = color for pix #1 and #3

;Since the color values have been calculated for pixels 1 and 3,
;pixels 0 and 2 still need to be determined.  Pixel 0 is calculated by
;(prev_pixel + pixel #1) / 2 and pixel 2 is calculated by (pixel #1 +
;pixel #3) / 2.  Output:
;      |--- 16 bit ----|
;      +-----------------------------------------------------------------+
;MM3 = |Color p0 index | Color p1 index | Color p2 index | Color p3 index|
;      +-----------------------------------------------------------------+

MOVD	    MM4, DWORD PTR mask_double_255
PACKSSDW    MM6, MM6

MOVQ	    MM7, MM6
MOVQ	    MM3, MM6

PSRLD	    MM7, 16

PUNPCKLWD   MM7, MM0

PADDW	    MM6, MM7

PSRLW	    MM6, 1

PUNPCKLWD   MM3, MM6
ADD	        EDI, 8

;Now that MM3 contains the 4 memory indexes in packed format, we need
;to unpack them in order to get the precomputed color values from the 256
;element color array.  Output:
;      |--- 16 bit ---|
;      +--------------------------------------------------------------+
;MM1 = | Color p3     | Color p2      | Color p1      | Color p0      |
;      +--------------------------------------------------------------+

;Write the 4 pixel colors to the backbuffer.
;Decrease the counter and loop back to draw four more pixels if necessary.
;The looping construct may look strange but it is done to allow for the
;calculation of the pixel colors at the end of the scan line.

;Or : divide(right shift) by the octave index and add to the prev ones

MOVD	  EBX ,MM3

PSRLW    MM3,[turbShift]

PADDW    MM3,[EDI]      
MOVQ	  [EDI], MM3	   ;Write out the 4 pix to video memory.

DEC	   ECX
JNZ	   start_scan_line

INC    ESI
;MOV  prev_color, EBX       ;EBX is the color index of pixel #3. Store it.
INC     [turbShift]  
DEC    [octShift]  

CMP  ESI, num_octaves
JNZ	   start_octave

MOV  prev_color, EBX       ;EBX is the color index of pixel #3. Store it.

;end_scan_line:

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; here we rearrange the turb buffer 
;; buffer[i] = p0:p1:p2:p3 --> buffer[i] = p3:p2:p1:p0

MOV   EDI , turb_buffer
MOV	  ECX, num_pixels

flipLoop: 
MOVQ       MM5, [EDI]      

MOVQ	         MM4, MM5
PUNPCKHDQ   MM5,MM5  ; mm5 = p0:p1:p0:p1

MOVQ            MM7,MM5   ; mm7 = p0:p1:p0:p1
PSRLD            MM5,16

MOVQ	        MM6, MM4
PUNPCKLWD  MM5,MM7  ; mm5  =  *:*:p1:p0

PSRLQ           MM6,16       ; mm6 =   0:p0:p1:p2

PUNPCKLWD  MM6,MM4  ; mm6  =  *:*:p3:p2

PUNPCKLDQ   MM5, MM6 ; mm5 = p3:p2:p1:p0

MOVQ	  [EDI], MM5	   
ADD	               EDI, 8
DEC         ECX
JNZ          flipLoop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

EMMS                       ; Clear out the MMX registers and set approp flags.

RET                        ; end of function

MMX_Octave ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

END
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -