⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rgbconv.asm

📁 这是一个JPEG解码器,里面使用了MMX,SSE等汇编指令集
💻 ASM
📖 第 1 页 / 共 3 页
字号:
        shufps      xmm5, xmm5, 010101010b      ;// CB2 CB2 CB2 CB2
        shufps      xmm6, xmm6, 010101010b      ;// CR2 CR2 CR2 CR2
        mulps       xmm5, TBL_MultCB            ;// 1.772*CB2 -0.34414*CB2 0 0
        mulps       xmm6, xmm7                  ;// 0 -0.71414*CR2 1.402*CR2 0
        addps       xmm4, xmm5                  ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
        addps       xmm4, xmm6                  ;// B2 G2 R2 Y2
        shufps      xmm0, xmm0, 011111111b      ;// Y3 Y3 Y3 Y3
        shufps      xmm1, xmm1, 011111111b      ;// CB3 CB3 CB3 CB3
        shufps      xmm2, xmm2, 011111111b      ;// CR3 CR3 CR3 CR3
        mulps       xmm1, TBL_MultCB            ;// 1.772*CB3 -0.34414*CB3 0 0
        mulps       xmm2, xmm7                  ;// 0 -0.71414*CR3 1.402*CR3 0
        addps       xmm0, xmm1                  ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
        addps       xmm0, xmm2                  ;// B3 G3 R3 Y3        
        cvtps2pi    mm1, xmm4                   ;// R2 Y2
        cvtps2pi    mm2, xmm0                   ;// R3 Y3        
        movhlps     xmm4, xmm4                  ;// B2 G2 B2 G2
        movhlps     xmm0, xmm0                  ;// B3 G3 B3 G3
        cvtps2pi    mm3, xmm4                   ;// B2 G2
        cvtps2pi    mm4, xmm0                   ;// B3 G3
        packssdw    mm1, mm3                    ;// B2 G2 R2 Y2
        packssdw    mm2, mm4                    ;// B3 G3 R3 Y3
        packuswb    mm1, mm2                    ;// B3 G3 R3 Y3 B2 G2 R2 Y2
        movq        [DST][24], mm1              ;// write 2 pixels
          
        ;// next 8 columns
        add         DST, [PTBL][I+12]
        add         I, 16
        jnz         ILoop
          
        ;// HmaxCount--
        ;// if (HmaxCount == 0) {
        ;//   HmaxCount += NbHmaxInRow
        ;//   pRGB += DeltaRGB
        ;// }
        
        dec         [Ctx].FrameInfo.HmaxCount
        jz          EndOfRow
AEndOfRow:
        mov         [Ctx].FrameInfo.cRGB, DST

        pop         ebx
        pop         edi
        pop         esi
    
        PROFILE_OUT "RGB_YCbCrConv SSE"

        ret

EndOfRow:
        add         DST, [Ctx].FrameInfo.DeltaRGB
        mov         eax, [Ctx].FrameInfo.NbHmaxRow
        mov         [Ctx].FrameInfo.HmaxCount, eax
        jmp         AEndOfRow

Y     TEXTEQU <>
CB    TEXTEQU <>
CR    TEXTEQU <>
DST   TEXTEQU <>
I     TEXTEQU <>
PTBL  TEXTEQU <>

RGB_YCbCrConv_SSE ENDP

;//=========================================================================
;// Convert Grayscale to RGB (SSE2)
;//=========================================================================

RGB_GrayConv_SSE2 PROC

Y     TEXTEQU <eax>
DST   TEXTEQU <ecx>
I     TEXTEQU <edx>
PTBL  TEXTEQU <DPTR esi>

        PROFILE_IN

        push        esi
                           
        ;// DST = pRGB;
        ;// Y = &SampleY0;
        ;// PTBL = &PointerTable[0]
        ;// for (i = 0; i < 8*Vmax*Hmax; i++) {
        ;//   ConvertRow(DST,Y)
        ;//   Y = PTBL[4*i]
        ;//   DST += PTBL[4*i]
        ;// }
                 
        mov         DST, [Ctx].FrameInfo.cRGB
        mov         I, [Ctx].FrameInfo.HmaxVmax64
        mov         PTBL, [Ctx].FrameInfo.PointerTable
        shr         I, 2
        movaps      xmm7, [Ctx].FrameInfo.S
        neg         I
ILoop:          
        mov         Y, [PTBL][I]
        ;// duplicate the Y value
        movaps      xmm0, [Y]                   ;// Y3 Y2 Y1 Y0
        movaps      xmm1, [Y][4*4]              ;// Y7 Y6 Y5 Y4
        addps       xmm0, xmm7                  ;// Y+S
        addps       xmm1, xmm7                  ;// Y+S
        cvtps2dq    xmm0, xmm0                  ;// Y3 Y2 Y1 Y0
        cvtps2dq    xmm1, xmm1                  ;// Y7 Y6 Y5 Y4
        pshufd      xmm2, xmm0, 011111111b      ;// Y3 Y3 Y3 Y3
        pshufd      xmm3, xmm0, 010101010b      ;// Y2 Y2 Y2 Y2
        pshufd      xmm4, xmm0, 001010101b      ;// Y1 Y1 Y1 Y1
        pshufd      xmm0, xmm0, 000000000b      ;// Y0 Y0 Y0 Y0
        packssdw    xmm3, xmm2                  ;// Y3 Y3 Y3 Y3 Y2 Y2 Y2 Y2
        packssdw    xmm0, xmm4                  ;// Y1 Y1 Y1 Y1 Y0 Y0 Y0 Y0
        pshufd      xmm2, xmm1, 011111111b      ;// Y7 Y7 Y7 Y7
        packuswb    xmm0, xmm3                  ;// Y3 Y3 Y3 Y3 ... Y0 Y0 Y0 Y0
        pshufd      xmm3, xmm1, 010101010b      ;// Y6 Y6 Y6 Y6
        pshufd      xmm4, xmm1, 001010101b      ;// Y5 Y5 Y5 Y5
        pshufd      xmm1, xmm1, 000000000b      ;// Y4 Y4 Y4 Y4
        packssdw    xmm3, xmm2                  ;// Y7 Y7 Y7 Y7 Y6 Y6 Y6 Y6
        packssdw    xmm1, xmm4                  ;// Y5 Y5 Y5 Y5 Y4 Y4 Y4 Y4
        packuswb    xmm1, xmm3                  ;// Y7 Y7 Y7 Y7 ... Y4 Y4 Y4 Y4
        movdqa      [DST], xmm0                 ;// write 4 pixels
        movdqa      [DST][4*4], xmm1            ;// write 4 pixels
        ;// next 8 columns
        add         DST, [PTBL][I+4]
        add         I, 8
        jnz         ILoop
          
        ;// HmaxCount--
        ;// if (HmaxCount == 0) {
        ;//   HmaxCount += NbHmaxInRow
        ;//   pRGB += DeltaRGB
        ;// }
        
        dec         [Ctx].FrameInfo.HmaxCount
        jz          EndOfRow
AEndOfRow:
        mov         [Ctx].FrameInfo.cRGB, DST

        pop         esi
    
        PROFILE_OUT "RGB_GrayConv SSE2"

        ret

EndOfRow:
        add         DST, [Ctx].FrameInfo.DeltaRGB
        mov         eax, [Ctx].FrameInfo.NbHmaxRow
        mov         [Ctx].FrameInfo.HmaxCount, eax
        jmp         AEndOfRow

Y     TEXTEQU <>
CB    TEXTEQU <>
CR    TEXTEQU <>
DST   TEXTEQU <>
I     TEXTEQU <>
PTBL  TEXTEQU <>

RGB_GrayConv_SSE2 ENDP

;//=========================================================================
;// Convert Grayscale to RGB (SSE)
;//=========================================================================

RGB_GrayConv_SSE PROC

Y     TEXTEQU <eax>
DST   TEXTEQU <ecx>
I     TEXTEQU <edx>
PTBL  TEXTEQU <DPTR esi>

        PROFILE_IN

        push        esi
                           
        ;// DST = pRGB;
        ;// Y = &SampleY0;
        ;// PTBL = &PointerTable[0]
        ;// for (i = 0; i < 8*Vmax*Hmax; i++) {
        ;//   ConvertRow(DST,Y)
        ;//   Y = PTBL[4*i]
        ;//   DST += PTBL[4*i]
        ;// }
                 
        mov         DST, [Ctx].FrameInfo.cRGB
        mov         I, [Ctx].FrameInfo.HmaxVmax64
        mov         PTBL, [Ctx].FrameInfo.PointerTable
        shr         I, 2
        movaps      xmm7, [Ctx].FrameInfo.S
        neg         I
ILoop:          
        mov         Y, [PTBL][I]
        ;// duplicate the Y value
        movaps      xmm0, [Y]                   ;// Y3 Y2 Y1 Y0
        movaps      xmm1, [Y][4*4]              ;// Y7 Y6 Y5 Y4
        addps       xmm0, xmm7                  ;// Y+S
        addps       xmm1, xmm7                  ;// Y+S        
        cvtps2pi    mm1, xmm0                   ;// Y1 Y0
        cvtps2pi    mm2, xmm1                   ;// Y5 Y4
        movhlps     xmm0, xmm0                  ;// Y3 Y2 Y3 Y2
        movhlps     xmm1, xmm1                  ;// Y7 Y6 Y7 Y6
        cvtps2pi    mm3, xmm0                   ;// Y3 Y2
        cvtps2pi    mm4, xmm1                   ;// Y7 Y6
        packssdw    mm1, mm3                    ;// Y3 Y2 Y1 Y0
        packssdw    mm2, mm4                    ;// Y7 Y6 Y5 Y4
        pshufw      mm5, mm1, 011111111b        ;// Y3 Y3 Y3 Y3
        pshufw      mm6, mm1, 010101010b        ;// Y2 Y2 Y2 Y2
        pshufw      mm7, mm1, 001010101b        ;// Y1 Y1 Y1 Y1
        pshufw      mm1, mm1, 000000000b        ;// Y0 Y0 Y0 Y0
        packuswb    mm6, mm5                    ;// Y3 Y3 Y3 Y3 Y2 Y2 Y2 Y2
        packuswb    mm1, mm7                    ;// Y1 Y1 Y1 Y1 Y0 Y0 Y0 Y0
        pshufw      mm3, mm2, 011111111b        ;// Y7 Y7 Y7 Y7
        pshufw      mm4, mm2, 010101010b        ;// Y6 Y6 Y6 Y6
        pshufw      mm5, mm2, 001010101b        ;// Y5 Y5 Y5 Y5
        pshufw      mm2, mm2, 000000000b        ;// Y4 Y4 Y4 Y4
        packuswb    mm4, mm3                    ;// Y7 Y7 Y7 Y7 Y6 Y6 Y6 Y6
        packuswb    mm2, mm5                    ;// Y5 Y5 Y5 Y5 Y4 Y4 Y4 Y4
        movq        [DST], mm1                  ;// write 2 pixels
        movq        [DST][8], mm6               ;// write 2 pixels
        movq        [DST][16], mm2              ;// write 2 pixels
        movq        [DST][24], mm4              ;// write 2 pixels

        ;// next 8 columns
        add         DST, [PTBL][I+4]
        add         I, 8
        jnz         ILoop
          
        ;// HmaxCount--
        ;// if (HmaxCount == 0) {
        ;//   HmaxCount += NbHmaxInRow
        ;//   pRGB += DeltaRGB
        ;// }
        
        dec         [Ctx].FrameInfo.HmaxCount
        jz          EndOfRow
AEndOfRow:
        mov         [Ctx].FrameInfo.cRGB, DST

        pop         esi
    
        PROFILE_OUT "RGB_GrayConv SSE2"

        ret

EndOfRow:
        add         DST, [Ctx].FrameInfo.DeltaRGB
        mov         eax, [Ctx].FrameInfo.NbHmaxRow
        mov         [Ctx].FrameInfo.HmaxCount, eax
        jmp         AEndOfRow

Y     TEXTEQU <>
CB    TEXTEQU <>
CR    TEXTEQU <>
DST   TEXTEQU <>
I     TEXTEQU <>
PTBL  TEXTEQU <>

RGB_GrayConv_SSE ENDP



END

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -