📄 mmxoctave.asm

📁 国外游戏开发者杂志1997年第九期配套代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
12 下一页
;/**************************************************************************
;
;Mixed Rendering
;
; **************************************************************************/
;/***************************************************************
;*
;*       This program has been developed by Intel Corporation.  
;*		You have Intel's permission to incorporate this code 
;*       into your product, royalty free.  Intel has various 
;*	    intellectual property rights which it may assert under
;*       certain circumstances, such as if another manufacturer's
;*       processor mis-identifies itself as being "GenuineIntel"
;*		when the CPUID instruction is executed.
;*
;*       Intel specifically disclaims all warranties, express or
;*       implied, and all liability, including consequential and
;*		other indirect damages, for the use of this code, 
;*		including liability for infringement of any proprietary
;*		rights, and including the warranties of merchantability
;*		and fitness for a particular purpose.  Intel does not 
;*		assume any responsibility for any errors which may 
;*		appear in this code nor any responsibility to update it.
;*
;*  * Other brands and names are the property of their respective
;*    owners.
;*
;*  Copyright (c) 1995, Intel Corporation.  All rights reserved.
;***************************************************************/
TITLE Modified form of Perlin's Noise Basis function using MMX(tm) technology

;prevent listing of iammx.inc file
.nolist
INCLUDE iammx.inc
.list

.586
.model FLAT

;***********************************************************************
;     Data Segment Declarations
;***********************************************************************
;.DATA
DSEG SEGMENT PARA

;KEY for comments
;P0, P1, P# = Pixel number 0, Pixel number 1, Pixel number # respectively.
;Pix        = Pixel
;DU         = Derivative of the variable U.
;DDU        = Derivative of the variable DU.
;Texel      = A point in the texture to be mapped onto the screen.  Given by U, V.

;Note: Even though the assembly writes four pixel values through each pass of the
;inner loop, only two of the pixels are directly calculated.  The other two pixels
;are averaged from neighboring pixels.  According to the current scheme, 
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MMX = | Pixel #0     | Pixel #1     | Pixel #2    | Pixel #3      |
;      +-----------------------------------------------------------+
;Pixels #1 and #3 are directly calculated.  Pixel #2 is averaged from Pixel #1 and
;pixel #3.  Pixel #0 is averaged from Pixel #1 and the previous pixel before #0.
;
;Also, the programmer realizes that the pixels are labeled from 0, 1, 2, 3 instead
;of 3, 2, 1, 0 as follows the conventional format of Intel Architecture.  This was 
;an oversite and not realized until it was to late.

;Variables, u, v, du, dv, ddu, ddv each contain parameters for two
;texels.  Since u, v, ..., ddv are 64 bit, then each texel parameter is
;32 bit.  (32 bit per texel * two texels = 64 bits).  This enables us
;to work with two pixels at one time using MMX technology.
ALIGN 8
u		    QWORD ?
du		   QWORD ?
ddu		  QWORD ?

v		    QWORD ?
dv		   QWORD ?
ddv		  QWORD ?

firstU     QWORD ?
firstV     QWORD ?

   

;Since the program only calculates odd pixel values, the even pixel values
;must be averaged.  Therefore, for each pass through the inner loop, four
;pixels will be drawn.	In order to draw the first pixel, the pixel before
;it must be known for the averaging.  This pixel color is contained here.
prev_color        DWORD 255
octShift			 DWORD  0,0
turbShift            DWORD  0,0

;Various masks.  Set up to filter out unwanted bits in MMX registers.
ALIGN 8
mask_32_to_15	  QWORD 00007FFF00007FFFh
mask_quad_1	  QWORD 0001000100010001h
mask_quad_255	  QWORD 00FF00FF00FF00FFh
mask_quad_256	  QWORD 0100010001000100h
mask_quad_510	  QWORD 01FE01FE01FE01FEh
mask_quad_511	  QWORD 01FF01FF01FF01FFh
mask_quad_1536	  QWORD 0600060006000600h
mask_double_255   QWORD 000000FF000000FFh
mask_double_FFFF  QWORD 0000FFFF0000FFFFh
mask_double_65536 QWORD 0001000000010000h
mask_four_255        QWORD 00FF00FF00FF00FFh


DSEG ENDS

;***********************************************************************
;     Constant Segment Declarations
;***********************************************************************
.const

;***********************************************************************
;     Code Segment Declarations
;***********************************************************************
.code

COMMENT^
void MMX_Octave(unsigned long u_init, unsigned long v_init, 
				            long du_init, long dv_init,
	                        unsigned long Num_Pix,
                            unsigned _int16* turb_buffer,
							unsigned long num_octaves);
^

MMX_Octave PROC NEAR C USES  ebx ecx edi esi,
			u_init:DWORD			, v_init:DWORD, 
			du_init:DWORD		   , dv_init:DWORD,
			num_pixels:DWORD    , turb_buffer:DWORD,
			num_octaves:DWORD 

;Initialization

MOVD	       MM0, u_init

MOVD		   MM1, v_init
PUNPCKLDQ   MM0, MM0	  ;U p1 = u, p3 = u

MOVD			 MM2, du_init
PUNPCKLDQ   MM1, MM1	  ;V p1 = v, p3 = v

MOVD	    MM3, dv_init
PADDD	    MM0, MM2	  ;U p1 = u, p3 = u + du

PADDD	    MM1, MM3	  ;V p1 = v, p3 = v + dv
PADDD	    MM0, MM2	  ;U p1 = u, p3 = u + 2du

PADDD	         MM1, MM3	  ;V p1 = v, p3 = v + 2dv
PUNPCKLDQ   MM2, MM2

PUNPCKLDQ   MM3, MM3
PADDD	         MM0, MM2	  ;U p1 = u + du, p3 = u + 3du

MOV        [turbShift]  , 0    ; turbShift is the octave number 0,1,2,....
XOR        ESI,ESI               

MOVQ	   DWORD PTR firstU , MM0
PADDD	   MM1, MM3	  ;V p1 = v + dv, p3 = v + 3dv

MOV          [octShift]   , 14  ; octshift is (14 - esi (octave number))
PSLLD	    MM2, 2	  ;DU p1 = 4du, p3 = 4du

MOVQ	   DWORD PTR firstV,  MM1
PSLLD	    MM3, 2   ;DU p1 = 4dv, p3 = 4dv

MOVQ	    DWORD PTR du, MM2

MOVQ	    DWORD PTR dv, MM3


start_octave : 
MOV	    EBX, prev_color
MOV	    EDI,  turb_buffer ;EDI will always be pointer to screen buffer

MOV	    ECX, num_pixels
SUB	     EDI, 8

;Get the UV parameters in MMX(tm) technology form.
;Note: UV texel values are stored in 10.22 fixed integer format.
;This sets up the U parameters for pixels 1 and 3 in MM0 register and
;V parameter in MM1 register.  After setup, the registers will contain:
;      |--------- 32 bit ------------|
;      +-------------------------------------------------------------------+
;MM0 = | U texel for pix #1 = u + du | U texel for pix #3 = u + 3du + 3ddu |
;      +-------------------------------------------------------------------+
;      +-------------------------------------------------------------------+
;MM1 = | V texel for pix #1 = v + dv | V texel for pix #3 = v + 3dv + 3ddv |
;      +-------------------------------------------------------------------+
;This is because the first four pixels drawn on the screen will have the
;U and V texel values of:
;Pixel #0 = u
;Pixel #1 = u + du
;Pixel #2 = u + 2du + ddu
;Pixel #3 = u + 3du + 3ddu
;We are only interested in pixels #1 and #3 because pixels #0 and #2 are averaged.

movq          mm0,     DWORD PTR firstU
movq          mm1,     DWORD PTR firstV
movq          DWORD PTR u , mm0     
movq          DWORD PTR v , mm1




;Get the du dv parameters in MMX(tm) technology form
;Note: du dv texel values are stored in 10.22 fixed integer format.
;This sets up the du parameters for pixels 1 and 3 in MM0 register and
;dv parameter in MM1 register.	After setup, the registers will contain:
;      |--------- 32 bit --------------|
;      +---------------------------------------------------------------+
;MM0 = | DU texel for p1 = 4du + 10ddu | DU texel for p3 = 4du + 18ddu |
;      +---------------------------------------------------------------+
;      +---------------------------------------------------------------+
;MM1 = | DV texel for p1 = 4dv + 10ddv | DV texel for p3 = 4dv + 18ddv |
;      +---------------------------------------------------------------+
;This is because after the first four pixels are drawn on the screen, the
;loop repeats to draw the next four pixels.  In order to get the next u, v
;texel coordinates, appropriate du, dv values need to be summed to u and v.
;The correct starting values of du and dv are:
;Pixel #0 = 4du	+  6ddu  ;Note: these have been mathematically proven.
;Pixel #1 = 4du + 10ddu
;Pixel #2 = 4du + 14ddu
;Pixel #3 = 4du + 18ddu
;We are only interested in pixels #1 and #3 because pixels #0 and #2 are averaged.

;nop
;nop
;nop
;nop
;nop
start_scan_line:
;First, the program converts the u and v texel coordinates
;from 10.22 format to 8.8 format.  10.22 format is used for
;decimal accuracy but only 16 of the 32 bits are actually used.
;Because the final format will fit in a 16 bit result, u and v
;values are converted from 4, 32 bit packed values
;to 4, 16 bit packed values that will fit in one MMX register.	Output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM0 = | U texel - p1 | U texel - p3 | V texel - p1 | V texel - p3 |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;u_16bit = u_init >> 14;
;v_16bit = v_init >> 14;

MOVQ	    MM1, DWORD PTR u;

MOVQ       MM3, DWORD PTR octShift

MOVQ	    MM0, DWORD PTR v;
PSRLD	    MM1,MM3							;Convert from 10.22 to 10.8

MOVQ	    MM2, DWORD PTR mask_32_to_15 ;Uses 15 instead of 16 because of signed saturation.
PSRLD	    MM0,MM3                           ;Convert from 10.22 to 10.8

;PSRLD	    MM1, [octShift] 	  
;PSRLD	    MM0, [octShift]	   

PAND	    MM1, MM2	   ;Convert from 10.8 to 7.8 integer format
PAND	    MM0, MM2	   ;Convert from 10.8 to 7.8 integer format

MOVQ	        MM3, DWORD PTR mask_quad_1
PACKSSDW    MM0, MM1	   ;Pack the result into one register

;Calculation of the bx0, by0, bx1, by1 values for both pixels.	Output:
;             |-8 bit-|
;      +-----------------------------------------------------------+
;MM2 = |      |BX0 p1 |      |BX0 p3 |      |BY0 p1 |      |BY0 p3 |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM3 = |      |BX1 p1 |      |BX1 p3 |      |BY1 p1 |      |BY1 p3 |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;bx0 = u_16bit >> 8;
;by0 = v_16bit >> 8;
;bx1 = bx0 + 1;
;by1 = by0 + 1;

MOVQ	    MM1, DWORD PTR u	    ;Used for incrementing u for next 4 pix.
MOVQ	    MM2, MM0

PSRLW	    MM2, 8

;PADDD	      MM1, MM4	    ;Used for incrementing u for next 4 pix.
PADDD	      MM1,  DWORD PTR du	    ;Used for incrementing u for next 4 pix.
PADDUSB     MM3, MM2   ;mm3 = 0:BX1(1):0:BX1(3):0:BY1(1):0:BY1(3)


;Calculation of the rx0, ry0 values for both pixels.  Final output:
;	      |-8 bit-|
;      +-----------------------------------------------------------+
;MM0 = |      |RX0 p1 |      |RX0 p3 |	    |RY0 p1 |	   |RY0 p3 |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;rx0 = u_16bit & 255;
;ry0 = v_16bit & 255;

PSLLW	    MM0, 8  
MOVQ	    MM4, MM3

MOVQ	    MM6, DWORD PTR mask_quad_1
PUNPCKHWD   MM4, MM2    ;mm4 = 0:BX0(1):0:BX1(1):0:BX0(3):0:BX1(3)

PUNPCKLWD   MM3, MM2    ;mm3  = 0:BY0(1):0:BY1(1):0:BY0(3):0:BY1(3)
PMULLW	    MM4, MM4       ;mm4  = BX0^2(1):BX1^2(1):BX0^2(3):BX1^2(3)
 
PSRLW	    MM0, 8	   ;MM0 = rx0 and ry0 param for pix 1, 3
;This section includes calculation of b00, b01, b10, b11.  Output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM4 = | b01 for p1   | b11 for p1   | b01 for p3   | b11 for p3   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM5 = | b00 for p1   |	b10 for p1   | b00 for p3   | b10 for p3   |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;b00 = random1((random1(bx0) + by0));
;b01 = random1((random1(bx0) + by1));
;b10 = random1((random1(bx1) + by0));
;b11 = random1((random1(bx1) + by1));
MOVQ	    MM2, MM3        
 
PUNPCKLDQ   MM3, MM3    ;mm3  = 0:BY0(3):0:BY1(3):0:BY0(3):0:BY1(3)

PUNPCKHDQ   MM2, MM2    ;mm2  = 0:BY0(1):0:BY1(1):0:BY0(1):0:BY1(1)
MOVQ	         MM5, MM4

MOVQ	         DWORD PTR u, MM1	   ;Used for incrementing u for next 4 pix.
PUNPCKLWD   MM4, MM4  ;mm4  = BX0^2(3):BX0^2(3):BX1^2(3):BX1^2(3)

PUNPCKHWD   MM5, MM5  ;mm5  = BX0^2(1):BX0^2(1):BX1^2(1):BX1^2(1)
PADDW	    MM4, MM3


PADDW	    MM5, MM2
;This section calculates g_b00_0, b_b01_0, g_b10_0, g_b11_0 for pix 1 and 3.
;Output:
;      |--- 16 bit ---|
;      +-----------------------------------------------------------+
;MM2 = | g_b00_1 p3   | g_b01_1 p3   | g_b10_1 p3   | g_b11_1 p3   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM3 = | g_b00_1 p1   |	g_b01_1 p1   | g_b10_1 p1   | g_b11_1 p1   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM4 = | g_b00_0 p3   | g_b01_0 p3   | g_b10_0 p3   | g_b11_0 p3   |
;      +-----------------------------------------------------------+
;      +-----------------------------------------------------------+
;MM5 = | g_b00_0 p1   |	g_b01_0 p1   | g_b10_0 p1   | g_b11_0 p1   |
;      +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;g_b00_0 = (random2(b00) & 511) - 256;
;g_b01_0 = (random2(b01) & 511) - 256;
;g_b10_0 = (random2(b10) & 511) - 256;
;g_b11_0 = (random2(b11) & 511) - 256;
;g_b00_1 = (random2(b00 + 1) & 511) - 256;
;g_b01_1 = (random2(b01 + 1) & 511) - 256;
;g_b10_1 = (random2(b10 + 1) & 511) - 256;
;g_b11_1 = (random2(b11 + 1) & 511) - 256;
PMULLW	    MM4, MM4  ;random1

PMULLW	    MM5, MM5  ;random1
MOVQ	      MM2, MM6

MOVQ	     MM3, MM6
PADDUSW   MM2, MM4

PMULLW	    MM2, MM2     ;random2
PADDUSW    MM3, MM5

MOVQ	    MM1, DWORD PTR mask_quad_256
PMULLW	   MM3, MM3     ;random2

MOVQ	    MM7, DWORD PTR mask_quad_511
PMULLW	   MM4, MM4    ;random2


PMULLW	    MM5, MM5   ;random2
PSRLW	      MM2, 2

PSRLW	    MM3, 2
PAND	    MM2, MM7

PSRLW	    MM4, 2
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -