⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 loopf_asm.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 2 页
字号:
        paddw       (mm5,mm7);

        paddw       (mm5,eax + LFABS_OFFSET             );//x += LoopFilterAdjustBeforeShift
        psubw       (mm0,mm1);

        paddw       (mm0,mm5);
        pxor        (mm6,mm6     );

        movd        (mm7,eax + TRANS_OFFSET + 8  );//xx xx xx xx 32 22 12 02
        psraw       (mm0,3                       );//values to be clipped

        movd        (mm3,eax + TRANS_OFFSET + 4  );//xx xx xx xx 31 21 11 01
        punpcklbw   (mm7,mm6);

        psubw       (mm7,mm2                     );//pms + y
        punpcklbw   (mm3,mm6);

        paddw       (mm3,mm2                     );//p0 - y
        packuswb    (mm7,mm7                     );//clamp pms + y

        packuswb    (mm3,mm3                     );//clamp p0 - y
        movq        (mm1,mm0);

        movq        (mm2,eax + LIMIT_OFFSET                 );//get the limit value
        psraw       (mm0,15);

//values to write out
        punpcklbw   (mm3,mm7                     );//32 31 22 21 12 11 02 01
        movq        (mm7,mm0                     );//save sign

        unsigned int ebp;
        movd        ((int&)ebp,mm3                     );//12 11 02 01
        pxor        (mm1,mm0);

//xor bp,bp

        *(uint16_t*)(ebx + 1)=ebp               ;//02 01
        psubsw      (mm1,mm0                     );//abs(i)

        ebp>>=16;//shr         ebp,16
        movq        (mm5,mm2);

        *(uint16_t*)(ebx + ecx + 1)=ebp;
        psrlq       (mm3,32                      );//xx xx xx xx 32 31 22 21

        por         (mm7,eax + FOURONES_OFFSET                );//now have -1 or 1
        psubw       (mm5,mm1                     );//limit - abs(i)

        movd        ((int&)ebp,mm3                     );//32 31 22 21
        movq        (mm4,mm5);

        *(uint16_t*)(ebx + ecx*2 + 1)=ebp;
        psraw       (mm5,15);

        ebp>>=16;// shr         ebp,16
        pxor        (mm4,mm5);

        *(uint16_t*)(ebx + edx + 1)=ebp;
        psubsw      (mm4,mm5                     );//abs(limit - abs(i))

        movd        (mm5,eax + TRANS_OFFSET + 24  );//xx xx xx xx 72 62 52 42
        psubusw     (mm2,mm4                     );//limit - abs(limit - abs(i))

        pmullw      (mm2,mm7                     );//new y
        pxor        (mm6,mm6);

        movd        (mm3,eax + TRANS_OFFSET + 20  );//xx xx xx xx 71 61 51 41
        punpcklbw   (mm5,mm6);

        ebx=ebx + ecx*4;
        punpcklbw   (mm3,mm6);

        paddw       (mm3,mm2                     );//pms + y
        psubw       (mm5,mm2                     );//p0 - y

        packuswb    (mm3,mm3                     );//clamp pms + y
        //pop         ebp
    //-

//
//NOTE: optimize the following crap somehow
//

        packuswb    (mm5,mm5                     );//clamp p0 - y
    //-
        punpcklbw   (mm3,mm5                     );//72 71 62 61 52 51 42 41
    //-
        unsigned int ax;
        movd        ((int&)ax,mm3                     );//52 51 42 41
        psrlq       (mm3,32                      );//xx xx xx xx 72 71 62 61

        *(uint16_t*)(ebx + 1)=ax;
    //-
        ax>>=16;//shr         eax,16
    //-

        *(uint16_t*)(ebx + ecx + 1)=ax;
    //-


        movd        ((int&)ax,mm3);
    //-

        *(uint16_t*)(ebx + ecx*2 + 1)=ax;
    //-

        ax>>=16;//shr         eax,16
    //-

        *(uint16_t*)(ebx + edx + 1)=ax;
    //-
}
/****************************************************************************
 *
 *  ROUTINE       :     FilterVert_MMX
 *
 *  INPUTS        :     None
 *
 *  OUTPUTS       :     None
 *
 *  RETURNS       :     None
 *
 *  FUNCTION      :     Applies a loop filter to a horizontal edge vertically
 *
 *  SPECIAL NOTES :
 *
 *
 *  ERRORS        :     None.
 *
 ****************************************************************************/
extern "C" void FilterVert_MMX(unsigned char * PixelPtr, ogg_int32_t LineLength, ogg_int32_t *BoundingValuePtr)
{
    ogg_int32_t ms = -LineLength;
    ogg_int32_t ms2 = ms + ms;

    /* A somewhat optimized MMX version of the top edge filter.*/
    /* Originally written for Tim's VP30 code. */
        unsigned char         *eax=(unsigned char*)BoundingValuePtr;
    //-

        unsigned char *ebx=(unsigned char*)PixelPtr;
        int         ecx=ms;                    //negative stride
        __m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
        movd        (mm1,ebx + 0               );//p0
        pxor        (mm4,mm4);

        movd        (mm0,ebx + ecx             );//get row above -- pms
        punpcklbw   (mm1,mm4                     );//convert to words

        int         edx=LineLength;
        punpcklbw   (mm0,mm4);

        movd        (mm6,ebx + ecx*2           );//pms2
        psubw       (mm1,mm0                     );//x = p0 - pms

        movq        (mm2,ebx + edx             );//pstride
        movq        (mm3,mm1);

        punpcklbw   (mm6,mm4);
        paddw       (mm3,mm1);

        punpcklbw   (mm2,mm4);
        paddw       (mm1,mm3);

        paddw       (mm1,eax + LFABS_OFFSET             );//x += LoopFilterAdjustBeforeShift
        psubw       (mm6,mm2);

        movq        (mm2,eax + LIMIT_OFFSET                 );//get the limit value
        paddw       (mm6,mm1);

        movd        (mm5,ebx + 4               );//p0
        psraw       (mm6,3                       );//values to be clipped

        movq        (mm1,mm6  );
        psraw       (mm6,15);

        movd        (mm7,ebx + ecx + 4         );//pms
        pxor        (mm1,mm6);

        psubsw      (mm1,mm6                     );//abs(i)
        pxor        (mm0,mm0);

        punpcklbw   (mm5,mm0);
        movq        (mm3,mm2);

        por         (mm6,eax + FOURONES_OFFSET                );//now have -1 or 1
        punpcklbw   (mm7,mm0);

        psubw       (mm3,mm1                     );//limit - abs(i)
        psubw       (mm5,mm7                     );//x = p0 - pms

        movq        (mm4,mm3);
        psraw       (mm3,15);

        movd        (mm0,ebx + ecx*2 + 4       );//pms2
        pxor        (mm4,mm3);

        movd        (mm1,ebx + edx +4          );//pstride
        psubsw      (mm4,mm3                     );//abs(limit - abs(i))

        pxor        (mm3,mm3);
        psubusw     (mm2,mm4                     );//limit - abs(limit - abs(i))

        punpcklbw   (mm0,mm3);
        movq        (mm7,mm5);

        paddw       (mm7,mm5);
        pmullw      (mm2,mm6                     );//new y -- wait 3 cycles

        punpcklbw   (mm1,mm3);
        paddw       (mm5,mm7);

        paddw       (mm5,eax + LFABS_OFFSET             );//x += LoopFilterAdjustBeforeShift
        psubw       (mm0,mm1);

        paddw       (mm0,mm5);
        pxor        (mm6,mm6     );

        movd        (mm7,ebx + 0               );//p0
        psraw       (mm0,3                       );//values to be clipped

        movd        (mm3,ebx + ecx             );//get row above -- pms
        punpcklbw   (mm7,mm6);

        psubw       (mm7,mm2                     );//pms + y
        punpcklbw   (mm3,mm6);

        paddw       (mm3,mm2                     );//p0 - y
        packuswb    (mm7,mm7                     );//clamp pms + y

        packuswb    (mm3,mm3                     );//clamp p0 - y
        movq        (mm1,mm0);

        movd        (ebx + 0,mm7               );//write p0
        psraw       (mm0,15);

        movq        (mm7,mm0                     );//save sign
        pxor        (mm1,mm0);

//
//
        movq        (mm2,eax + LIMIT_OFFSET                 );//get the limit value
//
//

        psubsw      (mm1,mm0                     );//abs(i)
        movq        (mm5,mm2);

        por         (mm7,eax + FOURONES_OFFSET                );//now have -1 or 1
        psubw       (mm5,mm1                     );//limit - abs(i)

        movq        (mm4,mm5);
        psraw       (mm5,15);

        movd        (ebx + ecx,mm3             );//write pms
        pxor        (mm4,mm5);

        psubsw      (mm4,mm5                     );//abs(limit - abs(i))
        pxor        (mm6,mm6);

        movd        (mm5,ebx + 4               );//p0
        psubusw     (mm2,mm4                     );//limit - abs(limit - abs(i))

        movd        (mm3,ebx + ecx + 4         );//pms
        pmullw      (mm2,mm7                     );//new y

        punpcklbw   (mm5,mm6);
    //-

        punpcklbw   (mm3,mm6);
    //-

        paddw       (mm3,mm2                     );//pms + y
        psubw       (mm5,mm2                     );//p0 - y

        packuswb    (mm3,mm3                     );//clamp pms + y
    //-

        packuswb    (mm5,mm5                     );//clamp p0 - y
    //-

        movd        (ebx + ecx + 4,mm3         );//write pms
//

        movd        (ebx + 4,mm5               );//write p0
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -