📄 mmxtexture.asm

📁 国外游戏开发者杂志1997年第九期配套代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
12 下一页
;/**************************************************************************
;
;Mixed Rendering
;
; **************************************************************************/
;/***************************************************************
;*
;*       This program has been developed by Intel Corporation.  
;*		You have Intel's permission to incorporate this code 
;*       into your product, royalty free.  Intel has various 
;*	    intellectual property rights which it may assert under
;*       certain circumstances, such as if another manufacturer's
;*       processor mis-identifies itself as being "GenuineIntel"
;*		when the CPUID instruction is executed.
;*
;*       Intel specifically disclaims all warranties, express or
;*       implied, and all liability, including consequential and
;*		other indirect damages, for the use of this code, 
;*		including liability for infringement of any proprietary
;*		rights, and including the warranties of merchantability
;*		and fitness for a particular purpose.  Intel does not 
;*		assume any responsibility for any errors which may 
;*		appear in this code nor any responsibility to update it.
;*
;*  * Other brands and names are the property of their respective
;*    owners.
;*
;*  Copyright (c) 1995, Intel Corporation.  All rights reserved.
;***************************************************************/
TITLE marble and wood textures using MMX(tm) technology

;prevent listing of iammx.inc file
.nolist
INCLUDE iammx.inc
.list
.586 
.model FLAT

;***********************************************************************
;     Data Segment Declarations
;***********************************************************************
;.DATA
DSEG SEGMENT PARA


extrn _sinTable : ptr sword
extrn _woodTable : ptr dword
extrn _sqrtTable : ptr dword
extrn _turbulenceTbl : ptr dword


;Variables, u, v, du, dv  each contain parameters for two
;texels.  Since u, v, ...  are 64 bit, then each texel parameter is
;32 bit.  (32 bit per texel * two texels = 64 bits).  This enables us
;to work with two pixels at one time using MMX technology.
ALIGN 8
du		    QWORD ?
dv		   QWORD ?
result	dd 0


;Various masks.  Set up to filter out unwanted bits in MMX registers.
ALIGN 8
mask_quad_green	           QWORD 0800080008000800h
mask_quad_10                 QWORD 000a000a000a000ah
mask_quad_15                 QWORD 000f000f000f000fh
mask_quad_735               QWORD 002df02df02df02dfh
mask_quad_1500             QWORD 05dc05dc05dc05dch
mask_FFFF_Minus_High  QWORD 0e000e000e000e000h
mask_FFFF_Minus_High_Wood QWORD 0e890e890e890e890h
mask_all_1					      QWORD 0ffffffffffffffffh
mask_clear_byte_1           QWORD 0000000000000ffffh
mask_high_words             QWORD 00000ffff0000ffffh
mask_low_words     QWORD 0ffff0000ffff0000h

DSEG ENDS

;***********************************************************************
;     Constant Segment Declarations
;***********************************************************************
.const

;***********************************************************************
;     Code Segment Declarations
;***********************************************************************
.code



; MMX_Marb uses the contents of turbulence_buffer which was filled 
; before by MMX_Octave with num_octaves of noise.
; Our marble approx is marb(x) = sin(x + turb(x)), we use a pre-computed
; sine table to accelerate it and to able to use MMX tech.
; In each iteration 4 pixles are calculated, 'num_pixels' is a mutiply of 4.


MMX_Marb PROC NEAR C USES eax ecx ebx edi ,
		u_init:DWORD, du_init:DWORD, num_pixels:DWORD

MOV	               ECX  ,   num_pixels              ; number of pixels in scanline  
LEA	                EDI   ,  _turbulenceTbl         ; allready calculated turbulence
MOVD             MM2 ,  du_init                      ; mm2 = 0:du

MOVD             MM0 ,  u_init                        ; mm0 = 0:u
PSLLQ            MM2 ,  32                             ; mm2 = du:0

SHR	               ECX, 2									; ECX= # of times to draw 4 pixels at once
PUNPCKLDQ   MM0, MM0						   ; p1 = u          , p0 = u

PADDD	         MM0, MM2							; p1 = u + du  , p0 = u
PUNPCKHDQ  MM2, MM2							  ; du               , du

MOVQ			 MM1, MM0							; p3 = u +du  ,  p2 = u  
PSLLD            MM2 , 1								; 2du             , 2du       

PADDD	         MM1 , MM2						   ; p3 = u + 3du, p2 = u + 2du
PSLLD             MM2 , 1								; 4du ,4du       

MOVQ              MM7,  DWORD PTR mask_high_words ; mm7 = 0000:ffff:0000:ffff


marb_loop:

MOVQ       MM5, [EDI]      ; mm5 = turb3:turb2:turb1:turb0
MOVQ       MM3, MM0      ; mm3 = u1:u0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;; we have to do packUsdw but there is no such an instruction
;;;;;;;;; so we do :  Shift Left by 16 , and then Shift Right Arithmetic by 16.
;;;;;;;;; The 16 bits shift left is done by 2 bits shift left instead of  14 bits shift right.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
MOVQ      MM4, MM1      ; mm4 = u3:u2
PSLLD	  MM3, 2 	        ; shift left by 2 (16 -14)  

 PMULLW	 MM5 , DWORD PTR mask_quad_10       ; turb = 10 * turb
 PSLLD	    MM4, 2 	        ;  shift left by 2 (16 -14)  

;PSRLD	   MM4, 14 	       ; Convert from 10.22 to 10.8  ;no need done by pslld mm4 , 2
;PSRLD	   MM3, 14 	       ; Convert from 10.22 to 10.8  ;no need done by pslld mm3 , 2

 PADDD	MM0, MM2        ; inc u1:u0 for next iteration
 PSRAD  MM3,16             ; extend sign bit for PACKSSDW

 PSRAD  MM4,16             ; extend sign bit for PACKSSDW
 ADD       EDI,8                 ; inc edi for next iteration

PSUBW        MM5 , DWORD PTR mask_quad_1500    ;  turb = turb - 1500
PACKSSDW  MM3, MM4      ;mm3 = (u3:u2:u1:u0) >> 14 and "packUsdw"

PADDD     MM1, MM2     ; inc u3:u2 for next iteration
PADDW    MM3 , MM5    ; marble indexes are:  (u_init >> 14) + (10 * turb) - 1500

MOVQ     MM6,MM3	    ; mm6 = indx3:indx2:indx1:indx0        ;;1	
PAND      MM3,MM7        ; mm3 = 0:indx2:0:indx0 

MOVD      EAX,MM3        ; eax = 0:indx0							       ;;2,3
PSRLD    MM6,16           ; mm6=0:indx3:0:indx1

MOVD     EBX,MM6         ; ebx=0:indx1									  ;;4,5
PSRLQ    MM3,32           ;  mm3=0:0:0:indx2 

MOVD     MM4  ,  [ _sinTable + eax*2]   ;pixel0			    ;; 6

MOVD               EAX,MM3       ; eax = indx2								;;7
PSRLQ			  MM6,32          ; mm6=0:0:0:indx3

PUNPCKLWD   MM4  ,  [ _sinTable  +   ebx*2]   ; pixel1	          ;; 8
MOVD               EBX   , MM6							     ; ebx=0:indx1	;; 9
MOVD               MM5  , [ _sinTable  + eax*2]      ; pixel2              ;; 10
PUNPCKLWD   MM5  , [ _sinTable  + ebx*2]      ; pixel3              ;; 11

PUNPCKLDQ    MM4  , MM5			 ; mm4 = p3:p2:p1:p0
MOVQ	           [EDI-8] , MM4	    ; store the 4 pixels to turb_buffer

DEC     ECX
JNZ  marb_loop

EMMS                       ; Clear out the MMX registers and set approp flags.

RET                        ; end of function
MMX_Marb ENDP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;MOVD      EAX,MM3   ; eax = indx1:indx0												; 1,2
;MOVQ      MM6,MM3  ; now we read the pixles' color from the sine table       
;AND         EAX, 0ffffh																				; 3
;PSRLD	  MM3, 16
;MOVD      MM4  , DWORD PTR [esi + eax*2] ;pixel0							   ; 4,5
;PSRLQ     MM6, 32
;MOVD      EBX,MM6  ; ebx = indx3:indx2												  ;  6
;PSRLD	  MM6, 16
;MOVD              EAX,MM3   ; eax = indx1												    ; 7
;AND                 EBX,0ffffh   ; ebx = indx2													;  8
;PUNPCKLWD    MM4 , DWORD PTR [esi + eax*2]   ;pixel1					;  9
;MOVD              MM5  , DWORD PTR [esi  + ebx*2]   ;pixel2				   ; 10
;MOVD              EBX,MM6									  ;ebx = indx3				   ; 11
;PUNPCKLWD   MM5 , DWORD PTR [esi][ebx*2]    ;pixel3					 ; 12

;MOVD				EAX,MM3   ; eax = indx1:indx0						;1,2
;PSRLQ				MM3,32     ; now we read the pixles' color from the sine table       
;MOV				  EDX , EAX														   ;3
;AND				   EAX , 0ffffh  
;MOVD				 EBX,mm3  ; ebx = indx3:indx2						 ;4
;MOVD				 MM4  , DWORD PTR [esi + eax*2] ;pixel0      ;5
;MOV				   EAX , EBX														;6
;AND                   EBX , 0ffffh ;ebx =indx2
;SHR                   EDX   , 16 ; edx = indx1                                   ; 7
;SHR                   EAX   , 16 ; eax = indx3									  ; 8
;MOVD                MM5  , DWORD PTR [esi  + ebx*2]   ;pixel2   ; 9
;PUNPCKLWD    MM4  , DWORD PTR [esi   + edx*2]   ;pixel1  ; 10 
;PUNPCKLWD    MM5  , DWORD PTR [esi   + eax*2]   ;pixel3  ; 11 

; PSLLD   MM3,16          ; no need done by : pslld mm3 , 2
; PSLLD   MM4,16          ; no need done by : pslld mm4 , 2
; The following adds a bias of 1 to the color so that real BLACK is
; never actually written to the sphere (thus it works nice for a chroma BLT
; paddusw			MM4, DWORD PTR mask_quad_green  
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

MMX_Wood PROC NEAR C USES edi  ecx eax edx,
		u_init:DWORD, v_init:DWORD,
		du_init:DWORD ,dv_init:DWORD,
		num_pixels:DWORD

MOV	              ECX ,   num_pixels 
LEA	                EDI  , _turbulenceTbl

MOVD             MM2 , du_init  ;  0:du 
SHR	               ECX , 1	          ; ECX= # of times to draw 2 pixels at once

MOVD             MM0, u_init    ;  0:u
PSLLQ            MM2, 32         ;  du:0

PUNPCKLDQ   MM0, MM0    ;  u:u

MOVD              MM3, dv_init  ;  0:v
PADDD	         MM0, MM2	  ; u + du:u

MOVD			  MM1, v_init    ;  0:v  
PUNPCKHDQ   MM2, MM2     ;  du:du

PUNPCKLDQ   MM1, MM1	;  v:v

PSLLQ            MM3, 32         ;  dv:0 

PADDD	         MM1 , MM3	  ;  v + dv:v 
PUNPCKHDQ   MM3, MM3	; dv:dv

PSLLD             MM2 , 1          ; 2du:2du       

PSLLD             MM3 , 1          ; 2dv:2dv       

wood_loop:

MOVQ       MM5, [EDI]       ;   turbulence
MOVQ       MM4, MM0       ;   u1 : u0

MOVQ       MM6, MM1       ;   v1 : v0
PSRLD	   MM4, 14 	         ; Convert from 10.22 to 10.8

; need to be checked
;PSLLD	   mm6 , 2 	          ; Convert from 10.22 to 10.8 (2 bits left instead of 14 bits right !!!)
;PAND       mm6 , dword ptr mask_low_words     
;POR         mm4 , mm6

PMADDWD	 MM4 , MM4   ;  u1*u1: u0*u0
PSRLD	     MM6, 14 	     ;   Convert from 10.22 to 10.8

PMADDWD	 MM6 , MM6   ;  v1*v1      : v0*v0
PADDD	     MM0, MM2     ;  u1 + 2du : u0 + 2du

PMULLW    MM5 ,  DWORD PTR mask_quad_15   ; turb  = 15 * turb
PADDD	    MM1  , MM3      ; v1 + 2dv : v0 + 2dv

PADDD       MM4, MM6        ; res1 = (u1*u1  + v1*v1) :  res0 = (u0*u0 + v0*v0)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;; we have to do packUsdw but there is no such an instruction
;;;;;;;;; so we do :  shift left by 16 , and then shift right Arithmetic by 16.
;;;;;;;;; the 16 bits shift left is done by 6 bits shift left instead of 10 bits shift right.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PSLLD           MM4 ,   6 
PSRAD           MM4 ,  16
PACKSSDW   MM4 ,  MM4

;;; now we clip the values against the range [0 : 2000h ] 
;;; which is the size of our sqrt table
PADDUSW     MM4 , dword ptr mask_FFFF_Minus_High
PSUBUSW     MM4 , dword ptr mask_FFFF_Minus_High

MOVD              EAX  ,  MM4
MOV                EDX  ,  EAX
AND			        EAX  ,  0ffffh							   ; eax =  res0
SHR                 EDX ,   16								  ; edx =  res1
 
MOVD              MM7 ,  [ _sqrtTable +  eax*2]  ; read from the sqrt table
PUNPCKLWD  MM7 ,  [ _sqrtTable +  edx*2]  ; 0:0:sqrt(res1):sqrt(res0) 

PMULLW         MM7  , DWORD PTR mask_quad_10   ; 10 * (0:0:sqrt(res1);sqrt(res0)) 
ADD                 EDI    , 4

PADDW           MM7  ,  MM5												; wood_indx      = 10 * sqrt(res) + 15 * turbulence
PSRLW           MM7  ,  2												    ; wood_indx  >>=  2
PSUBW          MM7  ,  DWORD PTR mask_quad_735     ; wood_indx    -= 735

PADDUSW     MM7 , dword ptr mask_FFFF_Minus_High_Wood
PSUBUSW     MM7 , dword ptr mask_FFFF_Minus_High_Wood



MOVD              EAX  , MM7												; 0:0:wood_indx1:wood_indx0
MOV                 EDX  , EAX
AND                 EAX  , 0ffffh												 ; eax = indx0 
SHR                 EDX  , 16												    ; edx = indx1    
MOVD              MM6 ,  [ _woodTable + eax*2]                  ;  read wood colors from table
PUNPCKLWD  MM6 ,  [ _woodTable + edx*2]                  ; 0:0:wood1:wood0

; need to be checked bias 
;paddusw			MM6, DWORD PTR mask_quad_green  

MOVD             EAX     , MM6
MOV	              [EDI-4] , EAX	   ; store the colors into turb_buffer

 DEC     ECX
JNZ  wood_loop

EMMS                    ; Clear out the MMX registers and set approp flags.

RET                        ; end of function
MMX_Wood ENDP


MMX_Wood_1 PROC NEAR C USES edi  ecx eax edx,
		u_init:DWORD, v_init:DWORD,
		du_init:DWORD ,dv_init:DWORD,
		num_pixels:DWORD

MOV	              ECX ,   num_pixels 
LEA	                EDI  , _turbulenceTbl

MOVD             MM2 , du_init  ;  0:du 
SHR	               ECX , 1	          ; ECX= # of times to draw 2 pixels at once

MOVD             MM0, u_init    ;  0:u
PSLLQ            MM2, 32         ;  du:0

PUNPCKLDQ   MM0, MM0    ;  u:u

MOVD              MM3, dv_init  ;  0:v
PADDD	         MM0, MM2	  ; u + du:u

MOVD			  MM1, v_init    ;  0:v  
PUNPCKHDQ   MM2, MM2     ;  du:du

PUNPCKLDQ   MM1, MM1	;  v:v

PSLLQ            MM3, 32         ;  dv:0 

PADDD	         MM1 , MM3	  ;  v + dv:v 
PUNPCKHDQ   MM3, MM3	; dv:dv

PSLLD             MM2 , 1          ; 2du:2du       

PSLLD             MM3 , 1          ; 2dv:2dv       

wood_loop:

MOVQ       MM4, MM0       ;   u1 : u0
MOVQ       MM6, MM1       ;   v1 : v0

PSLLD	    MM6 , 2 	      ; Convert from 10.22 to 10.8 (2 bits left instead of 14 bits right !!!)
PADDD	   MM0, MM2     ;  u1 + 2du : u0 + 2du

PAND         MM6 , dword ptr mask_low_words     
PSRLD	    MM4 , 14 	    ; Convert from 10.22 to 10.8

POR           MM4 , MM6
PADDD	    MM1  , MM3      ; v1 + 2dv : v0 + 2dv
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -