📄 gt.asm
字号:
*===================================================================
* TEXAS INSTRUMENTS, INC.
*
* 3D GEOMETRY TRANSFORMATION
*
* Revision Date: 11/5/97
*-------------------------------------------------------------------*
*
* USAGE This routine is C callable and can be called as:
*
*
* void transf(float* tmt, float* inp, float* out, siz)
*
* tmt - Transformation Matrix Xx Yx Zx Wx Sx Tx
* and Viewport Control Xy Yy Zy Wy Sy Ty
* (Scale and Translation) Xz Yz Zz Wz Sz Tz
* (# indicates not used) Xw Yw Zw Ww # #
*
* inp - An array of polygon vertices
* in object coordinates
* (number of vertices = siz) x y z w
* (each vertex has 4 values)
*
* out - An array of polygon vertices
* and clipping parameters w w_ xh xc0 xc1 xv
* in screen coordinates w w_ yh yc0 yc1 yv
* (number of vertices = siz) w w_ zh zc0 zc1 zv
* (each vertex has 18 values)
*
* siz - number of vertices (input or output). The vertex before
* processing consists of 4 values and after processing it
* consists of 18 values. Thus the total size of the input
* array is 4*siz words and the total size of the output
* array is 18*siz words.
*
* If this routine is not used as a C callable function, then
* you need to initialize the values for all of the parameters
* passed to the function in C. Those parameters are assumed to
* be in the registers as defined by the calling convention of
* the compiler (refer to the TMS320C6x Optimizing C Compiler
* User's Guide). In addition,if this routine is not used as C
* callable function, the code to push and pop A10, A11, A12,
* A13, A14, A15, B10, B11, B12, B13, B14 and B15 registers is
* not needed and can be removed. This routine does not have a
* return parameter - it only fills the "out" array.
*
* PERFORMANCE: approx 16 cycles per vertex
* or
* 12.5 Million vertices per second (5ns cycles)
*
* Included in the algorithm: geometry transformation
* clipping preprocessing
* perspective projection
* viewport mapping
*
* C CODE: This is the C equivalent of the assembly code. Note that the
* assembly code is hand optimized and restrictions may apply.
*
* void transf(float *TMT, float *INP, float *OUT, int SIZ)
* {
* float *inp;
* float w, w_, h;
* int s, c, r;
*
* TMT += 18; /*ptr to Xw Yw Zw Ww*/
* for (s=0 ; s<SIZ ; s++)
* { /* next vertex */
* inp = INP;
* w = 0.0;
* for (r=0 ; r<4 ; r++)
* {
* w += *inp++ * *TMT++;
* }
* TMT -= 22; /*ptr to Xx Yx Zx Wx*/
* w_= 1.0/w;
* for (c=0 ; c<3 ; c++)
* {
* inp = INP;
* *OUT++ = w; /* w , w , w */
* *OUT++ = w_; /* w_, w_, w_ */
* *OUT = 0.0;
* for (r=0 ; r<4 ; r++)
* {
* *OUT += *inp++ * *TMT++;
* }
* h = *OUT++; /* xh, yh, zh */
* *OUT++ = w - h; /* xc0, yc0, zc0 */
* *OUT++ = w + h; /* xc1, yc1, zc1 */
* h = h * w_;
* h = h * *TMT++;
* *OUT++ = h + *TMT++; /* xv, yv, zv */
* }
* INP = inp;
* }
* return;
* }
*
*-----------------------This is the main function that was used to call the transf
*
* # define SIZ 3
* extern void transf(float*, float*, float*, int);
*
* float tmt[24] = {1, 0, 0, 0, 1, 2,
* 0, 1, 0, 0, 3, 4,
* 0, 0, 1, 0, 5, 6,
* 0, 0, 0, 1, 0, 0 };
*
* float inp[4*SIZ] = {7, 7, 7, 7,
* 8, 8, 8, 8,
* 9, 9, 9, 9};
*
* float out[18*(SIZ+4)] = {0.0}; /*reserve 72 words for epilog*/
* int siz = SIZ;
* int ret;
*
* long main (void)
* {
* transf (tmt, inp, out, siz);
* return(ret);
* }
*
* DESCRIPTION: This routine represents the "front end" of the 3D graphics
* transformation pipeline. Before the 3D geometry is displayed
* on the screen, the vertex of each polygon has to be
* transformed to the screen coordinate system. This routine
* performes the geometry transformation, clipping
* preprocessing, perspective projection and viewport mapping.
* This routine does not perform clipping, however it provides
* the transformed vertices in a convenient format for the
* clipping function which is the next step in the 3D pipeline.
* This routine applies applies a 4x6 transformation matrix to
* every vertex of the 4xSIZE input array. Each polygon vertex
* consists of x,y,z and w coordinates. The transformation
* matrix includes linear distance, direction cosines and scale.
* The results are stored in the 18x(SIZE+4) output array that
* includes for each vertex: w, 1/w, xyz in homogeneous
* coordinates, xyz in viewport coordinates, and 6 clipping
* planes. The 4 "extra" vertices at the end of the output array
* contain trash written at the end of each loop in absence of
* loop epiloques.
*
*
* TECHNIQUES: 1. Load double word instruction is used to simultaneously
* load two floating point values in a single clock cycle
* 2. Software pipelining is used to schedule instructions
* so that multiple iterations of a loop execute in parallel
* 2. The www loop computes the w and 1/w for each vertex
* 3. The xxx loop computes the xh, xc0, xc1 and xv parameters
* 4. The yyy loop computes the yh, yc0, yc1 and yv parameters
* 5. The zzz loop computes the zh, zc0, zc1 and zv parameters
* 6. The division matissa error is < 2^-16 resulting from one
* iteration of Newton-Rapson algorithm x[n+1]=x[n]*(2-v*x[n]
* with the v seed computed by the RCPSP(reciprocal estimate)
* instruction
*
* ASSUMPTIONS: 1. Little Endian is assumed for LDDW
* 2. No restrictions on number of vertices
* 3. Padd the output array with 72 extra words to catch the
* loop epiloque trash.
*
* ARGUMENTS PASSED: *tmt -> A4
* *inp -> B4
* *out -> A6
* siz -> B6
*===================================================================*/
.global _transf
.bss stack,68 ; 68/4=17 regs (save C env
.text ; +TMT,INP,OPT,SIZ)
_transf
MVK .S1 stack,A0 ; new stack pointer in A0
MVKH .S1 stack,A0 ; new stack pointer in A0
MVK .S2 stack,B0 ; new stack pointer in B0
MVKH .S2 stack,B0 ; new stack pointer in B0
STW .D2 B3, *B0 ; push return addr on stack
STW .D1 A10,*+A0[1] ; push A10 on stack
|| STW .D2 B10,*+B0[2] ; push B10 on stack
STW .D1 A11,*+A0[3] ; push A11 on stack
|| STW .D2 B11,*+B0[4] ; push B11 on stack
STW .D1 A12,*+A0[5] ; push A12 on stack
|| STW .D2 B12,*+B0[6] ; push B12 on stack
STW .D1 A13,*+A0[7] ; push A13 on stack
|| STW .D2 B13,*+B0[8] ; push B13 on stack
STW .D1 A14,*+A0[9] ; push A14 on stack
|| STW .D2 B14,*+B0[10] ; push B14 on stack
STW .D1 A15,*+A0[11] ; push A15 on stack
|| STW .D2 B15,*+B0[12] ; push B15 on stack
STW .D1 A4, *+A0[13] ; TMT pntr (save for xyz loops)
|| STW .D2 B4, *+B0[14] ; INP pntr (save for xyz loops)
STW .D1 A6, *+A0[15] ; OUT pntr (save for xyz loops)
|| STW .D2 B6, *+B0[16] ; SIZ (save for xyz loops)
*---------www: loop (4 cycles per vertex)------*
*
MV .S1 A4,A0 ; load TMTw ptr (tmp) ****
MV .S1X B4,A14 ; load INP ptr (x,y) ****
MV .S1 A6,A15 ; load OUT ptr (w) ****
ADD .S2 10,B6,B2 ; init branch cnt/cond ****
LDDW .D1 *+A0[9],A13:A12 ; load TMTw (Y,X)
LDDW .D1 *+A0[10],B13:B12 ; load TMTw (W,Z)
MVK .S2 2,B3 ; load 2
INTSP .L2 B3,B3 ; 2 -> 2.0
MVK .S1 1,A0 ; load 1
INTSP .L1 A0,A0 ; 1 -> 1.0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -