📄 mmxoctave.asm
字号:
PAND MM3, MM7
PSRLW MM5, 2
PAND MM4, MM7
PAND MM5, MM7
PSUBW MM2, MM1 ;MM2 = g_b##_1 for pixel #3
PSUBW MM3, MM1 ;MM3 = g_b##_1 for pixel #1
PSUBW MM4, MM1 ;MM4 = g_b##_0 for pixel #3
PSUBW MM5, MM1 ;MM5 = g_b##_0 for pixel #1
;Take above data for g_b00_0, b_b01_0, g_b10_0, g_b11_0 for pix 1 and 3
;and rearrange the packed values in the MMX registers.
;Output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM2 = | g_b00_0 p3 | g_b00_1 p3 | g_b01_0 p3 | g_b01_1 p3 |
; +-----------------------------------------------------------+
; +-----------------------------------------------------------+
;MM3 = | g_b00_0 p1 | g_b00_1 p1 | g_b01_0 p1 | g_b01_1 p1 |
; +-----------------------------------------------------------+
; +-----------------------------------------------------------+
;MM6 = | g_b10_0 p3 | g_b10_1 p3 | g_b11_0 p3 | g_b11_1 p3 |
; +-----------------------------------------------------------+
; +-----------------------------------------------------------+
;MM7 = | g_b10_0 p1 | g_b10_1 p1 | g_b11_0 p1 | g_b11_1 p1 |
; +-----------------------------------------------------------+
MOVQ MM6, MM2
MOVQ MM7, MM3
PUNPCKHWD MM2, MM4 ;MM2 = g_b00_# and g_b01_# for pix #3
PUNPCKLWD MM6, MM4 ;MM6 = g_b10_# and g_b11_# for pix #3
PUNPCKHWD MM3, MM5 ;MM3 = g_b00_# and g_b01_# for pix #1
MOVQ MM4, MM0 ;Preparing for rx1 and ry1 calculation
PUNPCKLWD MM7, MM5 ;MM7 = g_b10_# and g_b11_# for pix #1
;Calculation of the rx1, ry1 values for both pixels. Final output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM4 = | RX1 p1 | RX1 p3 | RY1 p1 | RY1 p3 |
; +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;rx1 = rx0 - 256;
;ry1 = ry0 - 256;
PSUBW MM4, MM1 ;MM4 = rx1 and ry1 parameters
;Setup for the calculation of u1 and u2 for pix #1. Final output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM1 = | RX0 p1 | RY0 p1 | RX0 p1 | RY1 p1 |
; +-----------------------------------------------------------+
MOVQ MM5, MM0
MOVQ MM1, MM4
PSRLD MM5, 16
PSRAD MM1, 16
PSLLQ MM1, 32
PUNPCKHDQ MM1, MM5
PACKSSDW MM1, MM1
PACKSSDW MM5, MM5
PUNPCKLDQ MM1, MM5
;Calculation for U1 and U2 for pixel #1 -> After multiplication... Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM3 = | U1 for pixel #1 | U2 for pixel #1 |
; +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;u1 = rx0 * g_b00_0 + ry0 * g_b00_1;
;u2 = rx0 * g_b01_0 + ry1 * g_b01_1;
PMADDWD MM3, MM1 ;43u, MM3 = u1 and u2 for pixel #1
;Setup for the calculation of v1 and v2 for pix #1. Final output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM5 = | RX1 p1 | RY0 p1 | RX1 p1 | RY1 p1 |
; +-----------------------------------------------------------+
MOVQ MM5, MM4
PSRAD MM5, 16
MOVQ MM1, MM0
PSRLD MM1, 16
PSLLQ MM1, 32
PUNPCKHDQ MM1, MM5
PACKSSDW MM1, MM1
PACKSSDW MM5, MM5
PUNPCKLDQ MM5, MM1
;Calculation for V1 and V2 for pixel #1 -> After multiplication... Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM7 = | V1 for pixel #1 | V2 for pixel #1 |
; +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;v1 = rx1 * g_b00_0 + ry0 * g_b00_1;
;v2 = rx1 * g_b01_0 + ry1 * g_b01_1;
PMADDWD MM7, MM5 ;MM7 = v1 and v2 for pixel #1
;Setup for the calculation of u1 and u2 for pix #3. Final output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM1 = | RX0 p3 | RY0 p3 | RX0 p3 | RY1 p3 |
; +-----------------------------------------------------------+
MOVQ MM5, MM0
PSLLD MM5, 16
PSRLD MM5, 16
MOVQ MM1, MM4
PSLLD MM1, 16
PSRAD MM1, 16
PUNPCKLDQ MM1, MM1
PUNPCKHDQ MM1, MM5
PACKSSDW MM1, MM1
PACKSSDW MM5, MM5
PUNPCKLDQ MM1, MM5
;Calculation for U1 and U2 for pixel #3 -> After multiplication... Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM2 = | U1 for pixel #3 | U2 for pixel #3 |
; +-----------------------------------------------------+
PMADDWD MM2, MM1 ;MM2 = u1 and u2 for pixel #3
;Setup for the calculation of v1 and v2 for pix #3. Final output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM4 = | RX1 p3 | RY0 p3 | RX1 p3 | RY1 p3 |
; +-----------------------------------------------------------+
PSLLD MM4, 16
PSRAD MM4, 16
MOVQ MM5, MM0
PSLLD MM5, 16
PSRAD MM5, 16
PUNPCKLDQ MM5, MM5
PUNPCKHDQ MM5, MM4
PACKSSDW MM5, MM5
PACKSSDW MM4, MM4
PUNPCKLDQ MM4, MM5
;Calculation for V1 and V2 for pixel #3 -> After multiplication... Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM6 = | V1 for pixel #3 | V2 for pixel #3 |
; +-----------------------------------------------------+
PMADDWD MM6, MM4 ;MM6 = v1 and v2 for pixel #2
;Calculation for SX and SY for pixels #1 and #3, Output:
; |--- 16 bit ---|
; +-----------------------------------------------------------+
;MM1 = | SX p1 | SX p3 | SY p1 | SY p3 |
; +-----------------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;sx = (((rx0 * rx0) >> 1) * ((1536 - (rx0 << 2)))) >> 16;
;sy = (((ry0 * ry0) >> 1) * ((1536 - (ry0 << 2)))) >> 16;
MOVQ MM5, MM0
PMULLW MM5, MM5
MOVQ MM4, MM0
MOVQ MM1, DWORD PTR mask_quad_1536
PSLLW MM4, 2
PSUBD MM6, MM2 ;V1 - U1 and V2 - U2 for P3
PSUBD MM7, MM3 ;V1 - U1 and V2 - U2 for P1
PSUBW MM1, MM4
PSRLW MM5, 1
PMULHW MM1, MM5 ;MM1 = sx and sy param for pix 1, 3
;Calculation of A and B for pixel #1 and #3. Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM7 = | A for pixel #1 | B for pixel #1 |
; +-----------------------------------------------------+
; +-----------------------------------------------------+
;MM6 = | A for pixel #3 | B for pixel #3 |
; +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;a = u1 + sx * ((v1 - u1) >> 8);
;b = u2 + sx * ((v2 - u2) >> 8);
PSRAD MM7, 8
PSRAD MM6, 8
MOVQ MM4, MM1
MOVQ MM5, MM1
PSRLQ MM4, 16
PUNPCKLWD MM1, MM1
PUNPCKHDQ MM4, MM4
PMADDWD MM7, MM4
PSLLD MM5, 16
MOVQ MM4, DWORD PTR v ;Used for incrementing v for next 4 pix
PSRLD MM5, 16
PUNPCKHDQ MM5, MM5
;PADDD MM4, MM0 ;Used for incrementing v for next 4 pix
PADDD MM4, DWORD PTR dv ;Used for incrementing v for next 4 pix
PADDD MM7, MM3 ;MM7 = a and b parameter for pix #1
PMADDWD MM6, MM5
MOVQ MM3, DWORD PTR mask_double_65536
PSRLD MM1, 16
MOVQ DWORD PTR v, MM4 ;Used for incrementing v for next 4 pix
;Calculation of color indexes for pixel #1 and #3. Output:
; |--------- 32 bit ---------|
; +-----------------------------------------------------+
;MM7 = | Color index for pixel #1 | Color index for pixel #3 |
; +-----------------------------------------------------+
;This code correlates to the following "C" code in the "C_Noise()" function.
;color = (a + 65536 + sy * ((b - a) >> 8)) >> 9;
PADDD MM6, MM2 ;MM6 = a and b parameter for pix #3
MOVQ MM4, DWORD PTR mask_quad_510
MOVQ MM2, MM6
PUNPCKLDQ MM6, MM7
MOVD MM0, ebx ;Move the last color written into MM2
PUNPCKHDQ MM2, MM7
PADDD MM3, MM2
PSUBD MM6, MM2
PSRAD MM6, 8
PMADDWD MM6, MM1
PADDD MM6, MM3
PSRLD MM6, 9 ;MM6 = color for pix #1 and #3
;Since the color values have been calculated for pixels 1 and 3,
;pixels 0 and 2 still need to be determined. Pixel 0 is calculated by
;(prev_pixel + pixel #1) / 2 and pixel 2 is calculated by (pixel #1 +
;pixel #3) / 2. Output:
; |--- 16 bit ----|
; +-----------------------------------------------------------------+
;MM3 = |Color p0 index | Color p1 index | Color p2 index | Color p3 index|
; +-----------------------------------------------------------------+
MOVD MM4, DWORD PTR mask_double_255
PACKSSDW MM6, MM6
MOVQ MM7, MM6
MOVQ MM3, MM6
PSRLD MM7, 16
PUNPCKLWD MM7, MM0
PADDW MM6, MM7
PSRLW MM6, 1
PUNPCKLWD MM3, MM6
ADD EDI, 8
;Now that MM3 contains the 4 memory indexes in packed format, we need
;to unpack them in order to get the precomputed color values from the 256
;element color array. Output:
; |--- 16 bit ---|
; +--------------------------------------------------------------+
;MM1 = | Color p3 | Color p2 | Color p1 | Color p0 |
; +--------------------------------------------------------------+
;Write the 4 pixel colors to the backbuffer.
;Decrease the counter and loop back to draw four more pixels if necessary.
;The looping construct may look strange but it is done to allow for the
;calculation of the pixel colors at the end of the scan line.
;Or : divide(right shift) by the octave index and add to the prev ones
MOVD EBX ,MM3
PSRLW MM3,[turbShift]
PADDW MM3,[EDI]
MOVQ [EDI], MM3 ;Write out the 4 pix to video memory.
DEC ECX
JNZ start_scan_line
INC ESI
;MOV prev_color, EBX ;EBX is the color index of pixel #3. Store it.
INC [turbShift]
DEC [octShift]
CMP ESI, num_octaves
JNZ start_octave
MOV prev_color, EBX ;EBX is the color index of pixel #3. Store it.
;end_scan_line:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; here we rearrange the turb buffer
;; buffer[i] = p0:p1:p2:p3 --> buffer[i] = p3:p2:p1:p0
MOV EDI , turb_buffer
MOV ECX, num_pixels
flipLoop:
MOVQ MM5, [EDI]
MOVQ MM4, MM5
PUNPCKHDQ MM5,MM5 ; mm5 = p0:p1:p0:p1
MOVQ MM7,MM5 ; mm7 = p0:p1:p0:p1
PSRLD MM5,16
MOVQ MM6, MM4
PUNPCKLWD MM5,MM7 ; mm5 = *:*:p1:p0
PSRLQ MM6,16 ; mm6 = 0:p0:p1:p2
PUNPCKLWD MM6,MM4 ; mm6 = *:*:p3:p2
PUNPCKLDQ MM5, MM6 ; mm5 = p3:p2:p1:p0
MOVQ [EDI], MM5
ADD EDI, 8
DEC ECX
JNZ flipLoop
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
EMMS ; Clear out the MMX registers and set approp flags.
RET ; end of function
MMX_Octave ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -