📄 gouraud shading of a scanline of pixels.txt
字号:
*******************************************************************************
*
* TEXAS INSTRUMENTS, INC.
*
* GOURAUD SHADING
*
* Revision Data: 03/25/97
*
* USAGE This routine is C callable and can be called as
*
* void gouraud(unsigned int n, unsigned int rd, unsigned int r,
* unsigned int gd, unsigned int g, unsigned int bd,
* unsigned int b, int p[])
*
* n --- half of the pixels on a line (input)
* rd --- increment/decrement of the red color (input)
* r --- red color intensity (input)
* gd --- increment/decrement of the green color (input)
* g --- green color intensity (input)
* bd --- increment/decrement of the blue color (input)
* b --- blue color intensity (input)
* p[] --- array of pixel's color intensity (output)
*
* If the routine is not to be used as a C callable function,
* then all instructions relating to stack should be removed.
* Refer to comments of individual instructions. You will also
* need to initialize values for all the values passed as these
* are assumed to be in registers as defined by the calling
* convention of the compiler, (refer to the C compiler reference
* guide.)
*
* C CODE
* This is the C equivalent of the Assembly Code without the
* assumptions listed below. Note that the assembly code is hand
* optimized and assumptions apply.
*
* void gouraud(unsigned int n, unsigned int rd, unsigned int r,
* unsigned int gd, unsigned int g, unsigned int bd,
* int b, int p[])
* {
* unsigned int mask = 0xF800F800, i;
* for (i = 0; i < n; i++) {
* r += rd;
* g += gd;
* b += bd;
* p[i] = (r & mask) + ((g & mask) >> 5) + ((b & mask) >> 10);
* }
* }
*
* DESCRIPTION
* This routine is used to obtain the intermediate pixels'
* intensity given the intensity values of pixels at -2 and 2*n.
* The initial values of r, g, and b are the color intensities
* of pixel at -2 of colors red, green, and blue, repectively.
* Let r', g', and b' are the color intersities of pixel at 2n.
* Then the increment/decrement of the colors are
* rd' = (r'-r)/(2*n+2)
* gd' = (g'-g)/(2*n+2)
* bd' = (b'-b)/(2*n+2).
* *** HERE ALL THE VALUES ARE 16-BIT NUMBERS.
* The gouraud shading algorithm obtains the intermediate
* pixels' intensities with linear interpolation method.
* That is, at k, the pixel color intensities are:
* r" = r+(k+2)*rd'
* g" = g+(k+2)*gd'
* b" = b+(k+2)*bd'
* and the pixel's mixed color intensity is
* p[i]=(r"&0xF800)+((g"&0xF800)>>5)+((b"&0xF800)>>10).
*
* TECHNIQUES
* 1. We convert all the numbers into 32-bit, or word so that
* every iteration could compute 2 pixels.
* To do so, we use the following parameters defined as:
* rd = (2*rd')<<16 + (2*rd')
* gd = (2*gd')<<16 + (2*gd')
* bd = (2*bd')<<16 + (2*bd')
* and p[i] actually represents two pixels, the upper half
* represent pixel p[2k] and the lower half represent pixel
* p[2k+1]. (The C code is shown in C CODE section above.)
*
* 2. The code is actually implemented as
*
* {
* unsigned int mask = 0xF800F800, i, j;
* for (i = 0; i < n; i+=2) {
* r += rd;
* g += gd;
* b += bd;
* p[i]=(r & mask) + ((g & mask) >> 5) + ((b & mask) >> 10);
* r += rd;
* g += gd;
* b += bd;
* p[i+1]=(r & mask) + ((g & mask) >> 5) + ((b & mask) >> 10);
* }
* }
*
* That is FOUR pixels are computed per one iteration.
*
* ASSUMPTIONS:
* n >= 2, even.
*
* MEMORY NOTE:
* No memory bank hit under any conditions.
*
* CYCLES 2*N + 7
*
* PERFORMANCE COMMENTS:
* Limited by 6 ALUs/cycle.
*******************************************************************************
.global _gouraud
.text
_gouraud:
**** begin benchmark timing ***
B_START:
ADD .L1X B8,A10,A5 ; b += bd, i
|| MVK .S1 0F800h,A9 ; mask
|| STW .D2 A10,*B15-- ; push A10 on stack
ADD .L1X B4,A6,A3 ; r += rd, i
|| ADD .L2X B8,A5,B5 ; b += bd, i+1
|| ADD .S2 B8,B8,B2 ; bd *= 2
|| MVKLH .S1 0F800h,A9 ; mask = 0xF800F800
|| STW .D2 B11,*B15-- ; push B11 on stack
ADD .L1X B6,A8,A4 ; g += gd, i
|| ADD .L2X B4,A3,B1 ; r += rd, i+1
|| ADD .S2 B4,B4,B0 ; rd *= 2
|| AND .S1 A5,A9,A12 ; bm = b & mask, i
|| SUB .D1 A4,2,A1 ; n - 4
|| STW .D2 A12,*B15-- ; push A12 on stack
MV .L2X A9,B9 ; copy mask
|| MV .L1X B2,A2 ; copy bd
|| AND .S1 A3,A9,A6 ; rm = r & mask, i
|| STW .D2 B10,*B15-- ; push B10 on stack
MV .L1X B0,A10 ; copy rd
|| ADD .L2X B6,A4,B4 ; g += gd, i+1
|| ADD .D2 B6,B6,B11 ; gd *= 2
|| AND .S1 A4,A9,A7 ; gm = g & mask, i
|| AND .S2 B5,B9,B8 ; bm = b & mask, i+1
MV .L1X B11,A11 ; copy gd
|| AND .L2 B1,B9,B6 ; rm = r & mask, i+1
||[A1] SUB .S1 A1,2,A1 ; decrement loop counter
||[A1] B .S2 LOOP ; branch to the loop
|| STW .D2 A11,*B15 ; push A11 on stack
SHRU .S1 A12,10,A8 ; bs = bm >> 10, i
|| MV .L1X B10,A0 ; copy p
|| ADD .L2 4,B10,B10 ; offset p
LOOP:
ADD .D1 A5,A2,A5 ;* b += bd, i
|| ADD .L1 A6,A8,A8 ; p0 = rm + bs, i
|| ADD .D2 B5,B2,B5 ;* b += bd, i+1
|| AND .L2 B4,B9,B7 ; gm = g & mask, i+1
|| SHRU .S2 B8,10,B8 ; bs = bm >> 10, i+1
|| SHRU .S1 A7,5,A7 ; gs = gm >> 5, i
ADD .D1 A3,A10,A3 ;* r += rd, i
|| ADD .L1 A4,A11,A4 ;* g += gd, i
|| ADD .D2 B1,B0,B1 ;* r += rd, i
|| SHRU .S2 B7,5,B7 ; gs = gm >> 5, i+1
|| ADD .L2 B6,B8,B8 ; p0 = rm + bs, i+1
|| ADD .S1 A8,A7,A8 ; p[i] = p0 + gs, i
AND .S1 A5,A9,A12 ;* bm = b & mask, i
|| ADD .D2 B4,B11,B4 ;* g += gd, i+1
|| STW .D1 A8,*A0++[2] ; store p[i]
|| ADD .L2 B8,B7,B8 ; p[i+1] = p0 + gs, i+1
||[A1] B .S2 LOOP ; branch to the LOOP
|| AND .L1 A4,A9,A7 ; gm = g & mask, i
AND .L1 A3,A9,A6 ;* rm = r & mask, i
|| SHRU .S1 A12,10,A8 ;* bs = bm >> 10, i
|| AND .L2 B5,B9,B8 ;* bm = b & mask, i+1
|| AND .S2 B1,B9,B6 ;* rm = r & mask, i+1
|| STW .D2 B8,*B10++[2] ; store p[i+1]
||[A1] SUB .D1 A1,2,A1 ; decrement loop count
LOOP_END:
B_END:
*** end benchmarking timing ***
LDW .D2 *B15++,A11 ; pop A11 off stack
|| ADD .L1X 8,B15,A0 ; copy stack pointer
LDW .D1 *A0++[2],A12 ; pop A12 off stack
|| LDW .D2 *B15++[2],B10 ; pop B10 off stack
|| B .S2 B3 ; return
LDW .D1 *A0,A10 ; pop A10 off stack
|| LDW .D2 *B15++,B11 ; pop B11 off stack
NOP 4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -