📄 gt.asm

📁 TMS320bbs（源程序）的c67xfiles文件。用于在CCS2.0集成编译环境下实现TI的c67x系列DSP开发。是用DSP汇编语言
💻 ASM
📖 第 1 页 / 共 2 页
字号:
12 下一页
*===================================================================

*	TEXAS INSTRUMENTS, INC. 

*

*	3D GEOMETRY TRANSFORMATION

*				

*     	Revision Date: 11/5/97

*-------------------------------------------------------------------*     

*

*	USAGE	This routine is C callable and can be called as:

*

*		

*		void transf(float* tmt, float* inp, float* out, siz)         				      

*						      

*		tmt -	Transformation Matrix 		Xx Yx Zx Wx Sx Tx     

*			and Viewport Control    	Xy Yy Zy Wy Sy Ty     

*			(Scale and Translation) 	Xz Yz Zz Wz Sz Tz     

*			(# indicates not used)		Xw Yw Zw Ww  #  #     

*							     	   	      

*		inp - 	An array of polygon vertices	  

*			in object coordinates		    

*			(number of vertices = siz)	x  y  z  w	      

*			(each vertex has 4 values)

*								     	      

*		out - 	An array of polygon vertices 	          

*			and clipping parameters		w  w_ xh xc0 xc1 xv   

*			in screen coordinates	  	w  w_ yh yc0 yc1 yv   

*			(number of vertices = siz)    	w  w_ zh zc0 zc1 zv   

*			(each vertex has 18 values)

*			

*		siz -	number of vertices (input or output). The vertex before 

*			processing consists of 4 values and after processing it

*			consists of 18 values. Thus the total size of the input 

*			array is 4*siz words and the total size of the output 

*			array is 18*siz words.

*

*			If this routine is not used as a C callable function, then

*			you need to initialize the values for all of the parameters 

*			passed to the function in C. Those parameters are assumed to

*			be in the registers as defined by the calling convention of

*			the compiler (refer to the TMS320C6x Optimizing C Compiler 

*			User's Guide). In addition,if this routine is not used as C 

*			callable function, the code to push and pop A10, A11, A12, 

*			A13, A14, A15, B10, B11, B12, B13, B14 and B15 registers is

*			not needed and can be removed. This routine does not have a

*			return parameter - it only fills the "out" array.

*								       

*	PERFORMANCE:	approx 16 cycles per vertex				      

*				or					      

*			12.5 Million vertices per second (5ns cycles)	      

*									      

*			Included in the algorithm:	geometry transformation   

*						    	clipping preprocessing    

*						    	perspective projection    

*						    	viewport mapping  

*

*	C CODE:		This is the C equivalent of the assembly code. Note that the

*			assembly code is hand optimized and restrictions may apply. 

*

*			void transf(float *TMT, float *INP, float *OUT, int SIZ)

*			{

*			float	*inp;

*			float 	w, w_, h;

*			int	s, c,  r;

*

*			TMT += 18;				/*ptr to Xw Yw Zw Ww*/

*			for (s=0 ; s<SIZ ; s++)	

*				{				/* next vertex */

*				inp = INP;

*				w = 0.0;

*				for (r=0 ; r<4 ; r++)

*					{

*					w += *inp++ * *TMT++;

*					}

*				TMT -= 22;			/*ptr to Xx Yx Zx Wx*/

*				w_= 1.0/w;				

*				for (c=0 ; c<3 ; c++)

*					{

*					inp  = INP;

*					*OUT++ = w;		/* w ,  w ,  w   */

*					*OUT++ = w_;		/* w_,  w_,  w_  */

*					*OUT = 0.0;

*					for (r=0 ; r<4 ; r++)

*						{

*						*OUT += *inp++ * *TMT++; 

*						} 

*					h = *OUT++;		/* xh,  yh,  zh  */

*					*OUT++ = w - h;		/* xc0, yc0, zc0 */

*					*OUT++ = w + h;		/* xc1, yc1, zc1 */

*					h = h * w_;

*					h = h * *TMT++;

*					*OUT++ = h + *TMT++;	/* xv,  yv,  zv  */

*					}

*				INP = inp;

*				}

*			return;

*			}

*

*-----------------------This is the main function that was used to call the transf

*

*			# define 	SIZ	3

*			extern void transf(float*, float*, float*, int);

*

*			float tmt[24] = {1, 0, 0, 0, 1, 2,

*					 0, 1, 0, 0, 3, 4,

*					 0, 0, 1, 0, 5, 6,

*					 0, 0, 0, 1, 0, 0 };

*

*			float inp[4*SIZ] = {7, 7, 7, 7,

*		   			    8, 8, 8, 8,

*		    			    9, 9, 9, 9};

*

*			float out[18*(SIZ+4)] = {0.0}; /*reserve 72 words for epilog*/

*			int siz = SIZ;

*			int ret;

*

*			long main (void)

*				{

*				transf (tmt, inp, out, siz); 

*				return(ret);

*				} 

*

*	DESCRIPTION:	This routine represents the "front end" of the 3D graphics 

*			transformation pipeline. Before the 3D geometry is displayed

*			on the screen, the vertex of each polygon has to be 

*			transformed to the screen coordinate system. This routine 

*			performes the geometry transformation, clipping

*			preprocessing, perspective projection and viewport mapping.

*			This routine does not perform clipping, however it provides

*			the transformed vertices in a convenient format for the 

*			clipping function which is the next step in the 3D pipeline.

*			This routine applies applies a 4x6 transformation matrix to 

*			every vertex of the 4xSIZE input array. Each polygon vertex 

*			consists of x,y,z and w coordinates. The transformation 

*			matrix includes linear distance, direction cosines and scale.

*			The results are stored in the 18x(SIZE+4) output array that

*			includes for each vertex: w, 1/w, xyz in homogeneous

*			coordinates, xyz in viewport coordinates, and 6 clipping 

*			planes. The 4 "extra" vertices at the end of the output array

*			contain trash written at the end of each loop in absence of 

*			loop epiloques.

*			

*			

*	TECHNIQUES:	1. Load double word instruction is used to simultaneously

*			   load	two floating point values in a single clock cycle

*			2. Software pipelining is used to schedule instructions 

*			   so that multiple iterations of a loop execute in parallel

*			2. The www loop computes the w and 1/w for each vertex 

*			3. The xxx loop computes the xh, xc0, xc1 and xv parameters

*			4. The yyy loop computes the yh, yc0, yc1 and yv parameters

*			5. The zzz loop computes the zh, zc0, zc1 and zv parameters

*			6. The division matissa error is < 2^-16 resulting from one

*			   iteration of Newton-Rapson algorithm x[n+1]=x[n]*(2-v*x[n]

*			   with the v seed computed by the RCPSP(reciprocal estimate)

*			   instruction 

*

*	ASSUMPTIONS:	1. Little Endian is assumed for LDDW

*			2. No restrictions on number of vertices

*			3. Padd the output array with 72 extra words to catch the 

*			   loop epiloque trash.

*

*	ARGUMENTS PASSED:	*tmt	->	A4

*				*inp	->	B4

*				*out	->	A6

*				siz	->	B6

*===================================================================*/

	.global	_transf

	.bss	stack,68			; 68/4=17 regs (save C env

	.text					; +TMT,INP,OPT,SIZ)



_transf

	MVK	.S1	stack,A0		; new stack pointer in A0

	MVKH	.S1	stack,A0		; new stack pointer in A0



	MVK	.S2	stack,B0		; new stack pointer in B0

	MVKH	.S2	stack,B0		; new stack pointer in B0



	STW	.D2	B3,  *B0		; push return addr on stack

	STW	.D1	A10,*+A0[1]		; push A10 on stack

||	STW	.D2	B10,*+B0[2]		; push B10 on stack

	STW	.D1	A11,*+A0[3]		; push A11 on stack

||	STW	.D2	B11,*+B0[4]		; push B11 on stack

	STW	.D1	A12,*+A0[5]		; push A12 on stack

||	STW	.D2	B12,*+B0[6]		; push B12 on stack

	STW	.D1	A13,*+A0[7]		; push A13 on stack

||	STW	.D2	B13,*+B0[8]		; push B13 on stack

	STW	.D1	A14,*+A0[9]		; push A14 on stack

||	STW	.D2	B14,*+B0[10]		; push B14 on stack

	STW	.D1	A15,*+A0[11]		; push A15 on stack

||	STW	.D2	B15,*+B0[12]		; push B15 on stack

	STW	.D1	A4, *+A0[13]		; TMT pntr (save for xyz loops)

||	STW	.D2	B4, *+B0[14]		; INP pntr (save for xyz loops)

	STW	.D1	A6, *+A0[15]		; OUT pntr (save for xyz loops)

||	STW	.D2	B6, *+B0[16]		; SIZ      (save for xyz loops)























































*---------www:	loop (4 cycles per vertex)------*

*

	MV	.S1		A4,A0		; load TMTw ptr (tmp)	****	

	MV	.S1X		B4,A14		; load INP ptr (x,y)	****

	MV	.S1		A6,A15		; load OUT ptr (w)	****

	ADD	.S2		10,B6,B2	; init branch cnt/cond	****

	LDDW	.D1	*+A0[9],A13:A12		; load TMTw (Y,X)

	LDDW	.D1	*+A0[10],B13:B12	; load TMTw (W,Z)

	MVK	.S2	2,B3			; load 2

	INTSP	.L2	B3,B3			; 2 -> 2.0

	MVK	.S1	1,A0			; load 1

	INTSP	.L1	A0,A0			; 1 -> 1.0
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -