⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rgbconv.asm

📁 这是一个JPEG解码器,里面使用了MMX,SSE等汇编指令集
💻 ASM
📖 第 1 页 / 共 3 页
字号:
        pshufd      xmm6, xmm2, 01010101b       ;// CR1 CR1 CR1 CR1
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB1 -0.34414*CB1 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR1 1.402*CR1 0
        addps       xmm4, xmm5                  ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
        addps       xmm4, xmm6                  ;// B1 G1 R1 Y1
        cvtps2dq    xmm3, xmm3                  ;// convert floats to ints
        cvtps2dq    xmm4, xmm4                  ;// convert floats to ints
        packssdw    xmm3, xmm4                  ;// convert ints to words
        pshufd      xmm4, xmm0, 010101010b      ;// Y2 Y2 Y2 Y2
        pshufd      xmm5, xmm1, 010101010b      ;// CB2 CB2 CB2 CB2
        pshufd      xmm6, xmm2, 010101010b      ;// CR2 CR2 CR2 CR2
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB2 -0.34414*CB2 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR2 1.402*CR2 0
        addps       xmm4, xmm5                  ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
        addps       xmm4, xmm6                  ;// B2 G2 R2 Y2
        pshufd      xmm0, xmm0, 011111111b      ;// Y3 Y3 Y3 Y3
        pshufd      xmm1, xmm1, 011111111b      ;// CB3 CB3 CB3 CB3
        pshufd      xmm2, xmm2, 011111111b      ;// CR3 CR3 CR3 CR3
        mulps       xmm1, TBL_MultCB            ;// 1.772*CB3 -0.34414*CB3 0 0
        mulps       xmm2, xmm7                  ;// 0 -0.71414*CR3 1.402*CR3 0
        addps       xmm0, xmm1                  ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
        addps       xmm0, xmm2                  ;// B3 G3 R3 Y3
        cvtps2dq    xmm4, xmm4                  ;// convert floats to ints
        cvtps2dq    xmm0, xmm0                  ;// convert floats to ints
        packssdw    xmm4, xmm0                  ;// convert ints to words (Y3 R3 G3 B3 Y2 R2 G2 B2)
        packuswb    xmm3, xmm4                  ;// convert words to bytes (B3 G3 R3 Y3 ... B0 G0 R0 Y0)
        movdqa      [DST], xmm3                 ;// write 4 pixels

        movaps      xmm6, TBL_MultCB            ;// TBL_MultCB
        movaps      xmm0, [Y][4*4]              ;// Y3  Y2  Y1  Y0
        movaps      xmm1, [CB][4*4]             ;// CB3 CB2 CB1 CB0  
        movaps      xmm2, [CR][4*4]             ;// CR3 CR2 CR1 CR0
        addps       xmm0, [Ctx].FrameInfo.S     ;// Y+S
        pshufd      xmm3, xmm0, 0               ;// Y0 Y0 Y0 Y0
        pshufd      xmm4, xmm1, 0               ;// CB0 CB0 CB0 CB0
        pshufd      xmm5, xmm2, 0               ;// CR0 CR0 CR0 CR0
        mulps       xmm4, xmm6                  ;// 1.772*CB0 -0.34414*CB0 0 0
        mulps       xmm5, xmm7                  ;// 0 -0.71414*CR0 1.402*CR0 0
        addps       xmm3, xmm4                  ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
        addps       xmm3, xmm5                  ;// B0 G0 R0 Y0
        pshufd      xmm4, xmm0, 01010101b       ;// Y1 Y1 Y1 Y1
        pshufd      xmm5, xmm1, 01010101b       ;// CB1 CB1 CB1 CB1
        pshufd      xmm6, xmm2, 01010101b       ;// CR1 CR1 CR1 CR1
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB1 -0.34414*CB1 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR1 1.402*CR1 0
        addps       xmm4, xmm5                  ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
        addps       xmm4, xmm6                  ;// B1 G1 R1 Y1
        cvtps2dq    xmm3, xmm3                  ;// convert floats to ints
        cvtps2dq    xmm4, xmm4                  ;// convert floats to ints
        packssdw    xmm3, xmm4                  ;// convert ints to words
        pshufd      xmm4, xmm0, 010101010b      ;// Y2 Y2 Y2 Y2
        pshufd      xmm5, xmm1, 010101010b      ;// CB2 CB2 CB2 CB2
        pshufd      xmm6, xmm2, 010101010b      ;// CR2 CR2 CR2 CR2
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB2 -0.34414*CB2 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR2 1.402*CR2 0
        addps       xmm4, xmm5                  ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
        addps       xmm4, xmm6                  ;// B2 G2 R2 Y2
        shufps      xmm0, xmm0, 011111111b      ;// Y3 Y3 Y3 Y3
        shufps      xmm1, xmm1, 011111111b      ;// CB3 CB3 CB3 CB3
        shufps      xmm2, xmm2, 011111111b      ;// CR3 CR3 CR3 CR3
        mulps       xmm1, TBL_MultCB            ;// 1.772*CB3 -0.34414*CB3 0 0
        mulps       xmm2, xmm7                  ;// 0 -0.71414*CR3 1.402*CR3 0
        addps       xmm0, xmm1                  ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
        addps       xmm0, xmm2                  ;// B3 G3 R3 Y3
        cvtps2dq    xmm4, xmm4                  ;// convert floats to ints
        cvtps2dq    xmm0, xmm0                  ;// convert floats to ints
        packssdw    xmm4, xmm0                  ;// convert ints to words (Y3 R3 G3 B3 Y2 R2 G2 B2)
        packuswb    xmm3, xmm4                  ;// convert words to bytes (B3 G3 R3 Y3 ... B0 G0 R0 Y0)
        movdqa      [DST][4*4], xmm3            ;// write 4 pixels
          
        ;// next 8 columns
        add         DST, [PTBL][I+12]
        add         I, 16
        jnz         ILoop
          
        ;// HmaxCount--
        ;// if (HmaxCount == 0) {
        ;//   HmaxCount += NbHmaxInRow
        ;//   pRGB += DeltaRGB
        ;// }
        
        dec         [Ctx].FrameInfo.HmaxCount
        jz          EndOfRow
AEndOfRow:
        mov         [Ctx].FrameInfo.cRGB, DST

        pop         ebx
        pop         edi
        pop         esi
    
        PROFILE_OUT "RGB_YCbCrConv SSE2"

        ret

EndOfRow:
        add         DST, [Ctx].FrameInfo.DeltaRGB
        mov         eax, [Ctx].FrameInfo.NbHmaxRow
        mov         [Ctx].FrameInfo.HmaxCount, eax
        jmp         AEndOfRow

Y     TEXTEQU <>
CB    TEXTEQU <>
CR    TEXTEQU <>
DST   TEXTEQU <>
I     TEXTEQU <>
PTBL  TEXTEQU <>

RGB_YCbCrConv_SSE2 ENDP

;//=========================================================================
;// Convert YCbCr to RGB (SSE)
;//=========================================================================

RGB_YCbCrConv_SSE PROC

Y     TEXTEQU <edi>
CB    TEXTEQU <ebx>
CR    TEXTEQU <ecx>
DST   TEXTEQU <edx>
I     TEXTEQU <eax>
PTBL  TEXTEQU <DPTR esi>

        PROFILE_IN

        push        esi
        push        edi
        push        ebx
                            
        ;// DST = pRGB;
        ;// Y = &SampleY0;
        ;// CB = &SampleCB;
        ;// CR = &SampleCR;
        ;// PTBL = &PointerTable[0]
        ;// for (i = 0; i < 8*Vmax*Hmax; i++) {
        ;//   ConvertRow(DST,Y,CB,CR)
        ;//   Y = PTBL[4*i]
        ;//   CB = PTBL[4*i+1]
        ;//   CR = PTBL[4*i+2]
        ;//   DST += PTBL[4*i+3]
        ;// }
          
        mov         DST, [Ctx].FrameInfo.cRGB
        mov         I, [Ctx].FrameInfo.HmaxVmax64
        mov         PTBL, [Ctx].FrameInfo.PointerTable
        shr         I, 1
        neg         I
        movaps      xmm7, TBL_MultCR
ILoop:          
        mov         Y, [PTBL][I]
        mov         CB, [PTBL][I+4]
        mov         CR, [PTBL][I+8]
        ;// convert a row
        ;// R = (Y+S)              + 1.402   Cr
        ;// G = (Y+S) - 0.34414 Cb - 0.71414 Cr
        ;// B = (Y+S) + 1.772   Cb
        ;// S = 128 for 8-bit precision, 2048 for 12-bit precision
        movaps      xmm6, TBL_MultCB            ;// TBL_MultCB
        movaps      xmm0, [Y]                   ;// Y3  Y2  Y1  Y0
        movaps      xmm1, [CB]                  ;// CB3 CB2 CB1 CB0  
        movaps      xmm2, [CR]                  ;// CR3 CR2 CR1 CR0
        addps       xmm0, [Ctx].FrameInfo.S     ;// Y+S
        movaps      xmm3, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm4, xmm1                  ;// CB3 CB2 CB1 CB0  
        movaps      xmm5, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm3, xmm3, 0               ;// Y0 Y0 Y0 Y0
        shufps      xmm4, xmm4, 0               ;// CB0 CB0 CB0 CB0
        shufps      xmm5, xmm5, 0               ;// CR0 CR0 CR0 CR0
        mulps       xmm4, xmm6                  ;// 1.772*CB0 -0.34414*CB0 0 0
        mulps       xmm5, xmm7                  ;// 0 -0.71414*CR0 1.402*CR0 0
        addps       xmm3, xmm4                  ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
        addps       xmm3, xmm5                  ;// B0 G0 R0 Y0
        movaps      xmm4, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm5, xmm1                  ;// CB3 CB2 CB1 CB0  
        movaps      xmm6, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm4, xmm4, 01010101b       ;// Y1 Y1 Y1 Y1
        shufps      xmm5, xmm5, 01010101b       ;// CB1 CB1 CB1 CB1
        shufps      xmm6, xmm6, 01010101b       ;// CR1 CR1 CR1 CR1
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB1 -0.34414*CB1 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR1 1.402*CR1 0
        addps       xmm4, xmm5                  ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
        addps       xmm4, xmm6                  ;// B1 G1 R1 Y1
        cvtps2pi    mm1, xmm3                   ;// R0 Y0
        cvtps2pi    mm2, xmm4                   ;// R1 Y1        
        movhlps     xmm3, xmm3                  ;// B0 G0 B0 G0
        movhlps     xmm4, xmm4                  ;// B1 G1 B1 G1
        cvtps2pi    mm3, xmm3                   ;// B0 G0
        cvtps2pi    mm4, xmm4                   ;// B1 G1
        packssdw    mm1, mm3                    ;// B0 G0 R0 Y0
        packssdw    mm2, mm4                    ;// B1 G1 R1 Y1
        packuswb    mm1, mm2                    ;// B1 G1 R1 Y1 B0 G0 R0 Y0
        movq        [DST][0], mm1               ;// write 2 pixels

        movaps      xmm4, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm5, xmm1                  ;// CB3 CB2 CB1 CB0
        movaps      xmm6, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm4, xmm4, 010101010b      ;// Y2 Y2 Y2 Y2
        shufps      xmm5, xmm5, 010101010b      ;// CB2 CB2 CB2 CB2
        shufps      xmm6, xmm6, 010101010b      ;// CR2 CR2 CR2 CR2
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB2 -0.34414*CB2 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR2 1.402*CR2 0
        addps       xmm4, xmm5                  ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
        addps       xmm4, xmm6                  ;// B2 G2 R2 Y2
        shufps      xmm0, xmm0, 011111111b      ;// Y3 Y3 Y3 Y3
        shufps      xmm1, xmm1, 011111111b      ;// CB3 CB3 CB3 CB3
        shufps      xmm2, xmm2, 011111111b      ;// CR3 CR3 CR3 CR3
        mulps       xmm1, TBL_MultCB            ;// 1.772*CB3 -0.34414*CB3 0 0
        mulps       xmm2, xmm7                  ;// 0 -0.71414*CR3 1.402*CR3 0
        addps       xmm0, xmm1                  ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
        addps       xmm0, xmm2                  ;// B3 G3 R3 Y3        
        cvtps2pi    mm1, xmm4                   ;// R2 Y2
        cvtps2pi    mm2, xmm0                   ;// R3 Y3        
        movhlps     xmm4, xmm4                  ;// B2 G2 B2 G2
        movhlps     xmm0, xmm0                  ;// B3 G3 B3 G3
        cvtps2pi    mm3, xmm4                   ;// B2 G2
        cvtps2pi    mm4, xmm0                   ;// B3 G3
        packssdw    mm1, mm3                    ;// B2 G2 R2 Y2
        packssdw    mm2, mm4                    ;// B3 G3 R3 Y3
        packuswb    mm1, mm2                    ;// B3 G3 R3 Y3 B2 G2 R2 Y2
        movq        [DST][8], mm1               ;// write 2 pixels

        movaps      xmm6, TBL_MultCB            ;// TBL_MultCB
        movaps      xmm0, [Y][4*4]              ;// Y3  Y2  Y1  Y0
        movaps      xmm1, [CB][4*4]             ;// CB3 CB2 CB1 CB0  
        movaps      xmm2, [CR][4*4]             ;// CR3 CR2 CR1 CR0
        addps       xmm0, [Ctx].FrameInfo.S     ;// Y+S
        movaps      xmm3, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm4, xmm1                  ;// CB3 CB2 CB1 CB0  
        movaps      xmm5, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm3, xmm3, 0               ;// Y0 Y0 Y0 Y0
        shufps      xmm4, xmm4, 0               ;// CB0 CB0 CB0 CB0
        shufps      xmm5, xmm5, 0               ;// CR0 CR0 CR0 CR0
        mulps       xmm4, xmm6                  ;// 1.772*CB0 -0.34414*CB0 0 0
        mulps       xmm5, xmm7                  ;// 0 -0.71414*CR0 1.402*CR0 0
        addps       xmm3, xmm4                  ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
        addps       xmm3, xmm5                  ;// B0 G0 R0 Y0
        movaps      xmm4, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm5, xmm1                  ;// CB3 CB2 CB1 CB0  
        movaps      xmm6, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm4, xmm4, 01010101b       ;// Y1 Y1 Y1 Y1
        shufps      xmm5, xmm5, 01010101b       ;// CB1 CB1 CB1 CB1
        shufps      xmm6, xmm6, 01010101b       ;// CR1 CR1 CR1 CR1
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB1 -0.34414*CB1 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR1 1.402*CR1 0
        addps       xmm4, xmm5                  ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
        addps       xmm4, xmm6                  ;// B1 G1 R1 Y1
        cvtps2pi    mm1, xmm3                   ;// R0 Y0
        cvtps2pi    mm2, xmm4                   ;// R1 Y1        
        movhlps     xmm3, xmm3                  ;// B0 G0 B0 G0
        movhlps     xmm4, xmm4                  ;// B1 G1 B1 G1
        cvtps2pi    mm3, xmm3                   ;// B0 G0
        cvtps2pi    mm4, xmm4                   ;// B1 G1
        packssdw    mm1, mm3                    ;// B0 G0 R0 Y0
        packssdw    mm2, mm4                    ;// B1 G1 R1 Y1
        packuswb    mm1, mm2                    ;// B1 G1 R1 Y1 B0 G0 R0 Y0
        movq        [DST][16], mm1               ;// write 2 pixels

        movaps      xmm4, xmm0                  ;// Y3  Y2  Y1  Y0
        movaps      xmm5, xmm1                  ;// CB3 CB2 CB1 CB0
        movaps      xmm6, xmm2                  ;// CR3 CR2 CR1 CR0
        shufps      xmm4, xmm4, 010101010b      ;// Y2 Y2 Y2 Y2

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -