⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dct-a.asm

📁 从服务器上下的x264编码器C源码……希望对大家有帮助……这个是09年4月的
💻 ASM
📖 第 1 页 / 共 2 页
字号:
    packuswb  xmm1, xmm1    movdqa    xmm2, xmm0    movdqa    xmm3, xmm1    pshufb    xmm0, xmm5    pshufb    xmm2, xmm6    pshufb    xmm1, xmm5    pshufb    xmm3, xmm6    IDCT_DC_STORE FDEC_STRIDE*-4, xmm0, xmm1    IDCT_DC_STORE 0, xmm2, xmm3    ret;-----------------------------------------------------------------------------; void x264_zigzag_scan_8x8_frame_ssse3( int16_t level[64], int16_t dct[8][8] );-----------------------------------------------------------------------------%macro SCAN_8x8 1cglobal x264_zigzag_scan_8x8_frame_%1, 2,2,8    movdqa    xmm0, [r1]    movdqa    xmm1, [r1+16]    movdq2q    mm0, xmm0    PALIGNR   xmm1, xmm1, 14, xmm2    movdq2q    mm1, xmm1    movdqa    xmm2, [r1+32]    movdqa    xmm3, [r1+48]    PALIGNR   xmm2, xmm2, 12, xmm4    movdq2q    mm2, xmm2    PALIGNR   xmm3, xmm3, 10, xmm4    movdq2q    mm3, xmm3    punpckhwd xmm0, xmm1    punpckhwd xmm2, xmm3    movq       mm4, mm1    movq       mm5, mm1    movq       mm6, mm2    movq       mm7, mm3    punpckhwd  mm1, mm0    psllq      mm0, 16    psrlq      mm3, 16    punpckhdq  mm1, mm1    punpckhdq  mm2, mm0    punpcklwd  mm0, mm4    punpckhwd  mm4, mm3    punpcklwd  mm4, mm2    punpckhdq  mm0, mm2    punpcklwd  mm6, mm3    punpcklwd  mm5, mm7    punpcklwd  mm5, mm6    movdqa    xmm4, [r1+64]    movdqa    xmm5, [r1+80]    movdqa    xmm6, [r1+96]    movdqa    xmm7, [r1+112]    movq [r0+2*00], mm0    movq [r0+2*04], mm4    movd [r0+2*08], mm1    movq [r0+2*36], mm5    movq [r0+2*46], mm6    PALIGNR   xmm4, xmm4, 14, xmm3    movdq2q    mm4, xmm4    PALIGNR   xmm5, xmm5, 12, xmm3    movdq2q    mm5, xmm5    PALIGNR   xmm6, xmm6, 10, xmm3    movdq2q    mm6, xmm6%ifidn %1, ssse3    PALIGNR   xmm7, xmm7, 8, xmm3    movdq2q    mm7, xmm7%else    movhlps   xmm3, xmm7    punpcklqdq xmm7, xmm7    movdq2q    mm7, xmm3%endif    punpckhwd xmm4, xmm5    punpckhwd xmm6, xmm7    movq       mm0, mm4    movq       mm1, mm5    movq       mm3, mm7    punpcklwd  mm7, mm6    psrlq      mm6, 16    punpcklwd  mm4, mm6    punpcklwd  mm5, mm4    punpckhdq  mm4, mm3    punpcklwd  mm3, mm6    punpckhwd  mm3, mm4    punpckhwd  mm0, mm1    punpckldq  mm4, mm0    punpckhdq  mm0, mm6    pshufw     mm4, mm4, 0x6c    movq [r0+2*14], mm4    movq [r0+2*25], mm0    movd [r0+2*54], mm7    movq [r0+2*56], mm5    movq [r0+2*60], mm3    movdqa    xmm3, xmm0    movdqa    xmm7, xmm4    punpckldq xmm0, xmm2    punpckldq xmm4, xmm6    punpckhdq xmm3, xmm2    punpckhdq xmm7, xmm6    pshufhw   xmm0, xmm0, 0x1b    pshuflw   xmm4, xmm4, 0x1b    pshufhw   xmm3, xmm3, 0x1b    pshuflw   xmm7, xmm7, 0x1b    movlps [r0+2*10], xmm0    movhps [r0+2*17], xmm0    movlps [r0+2*21], xmm3    movlps [r0+2*28], xmm4    movhps [r0+2*32], xmm3    movhps [r0+2*39], xmm4    movlps [r0+2*43], xmm7    movhps [r0+2*50], xmm7    RET%endmacroINIT_XMM%define PALIGNR PALIGNR_MMXSCAN_8x8 sse2%define PALIGNR PALIGNR_SSSE3SCAN_8x8 ssse3;-----------------------------------------------------------------------------; void x264_zigzag_scan_8x8_frame_mmxext( int16_t level[64], int16_t dct[8][8] );-----------------------------------------------------------------------------cglobal x264_zigzag_scan_8x8_frame_mmxext, 2,2    movq       mm0, [r1]    movq       mm1, [r1+2*8]    movq       mm2, [r1+2*14]    movq       mm3, [r1+2*21]    movq       mm4, [r1+2*28]    movq       mm5, mm0    movq       mm6, mm1    psrlq      mm0, 16    punpckldq  mm1, mm1    punpcklwd  mm5, mm6    punpckhwd  mm1, mm3    punpckhwd  mm6, mm0    punpckldq  mm5, mm0    movq       mm7, [r1+2*52]    movq       mm0, [r1+2*60]    punpckhwd  mm1, mm2    punpcklwd  mm2, mm4    punpckhwd  mm4, mm3    punpckldq  mm3, mm3    punpckhwd  mm3, mm2    movq      [r0], mm5    movq  [r0+2*4], mm1    movq  [r0+2*8], mm6    punpcklwd  mm6, mm0    punpcklwd  mm6, mm7    movq       mm1, [r1+2*32]    movq       mm5, [r1+2*39]    movq       mm2, [r1+2*46]    movq [r0+2*35], mm3    movq [r0+2*47], mm4    punpckhwd  mm7, mm0    psllq      mm0, 16    movq       mm3, mm5    punpcklwd  mm5, mm1    punpckhwd  mm1, mm2    punpckhdq  mm3, mm3    movq [r0+2*52], mm6    movq [r0+2*13], mm5    movq       mm4, [r1+2*11]    movq       mm6, [r1+2*25]    punpcklwd  mm5, mm7    punpcklwd  mm1, mm3    punpckhdq  mm0, mm7    movq       mm3, [r1+2*4]    movq       mm7, [r1+2*18]    punpcklwd  mm2, mm5    movq [r0+2*25], mm1    movq       mm1, mm4    movq       mm5, mm6    punpcklwd  mm4, mm3    punpcklwd  mm6, mm7    punpckhwd  mm1, mm3    punpckhwd  mm5, mm7    movq       mm3, mm6    movq       mm7, mm5    punpckldq  mm6, mm4    punpckldq  mm5, mm1    punpckhdq  mm3, mm4    punpckhdq  mm7, mm1    movq       mm4, [r1+2*35]    movq       mm1, [r1+2*49]    pshufw     mm6, mm6, 0x1b    pshufw     mm5, mm5, 0x1b    movq [r0+2*60], mm0    movq [r0+2*56], mm2    movq       mm0, [r1+2*42]    movq       mm2, [r1+2*56]    movq [r0+2*17], mm3    movq [r0+2*32], mm7    movq [r0+2*10], mm6    movq [r0+2*21], mm5    movq       mm3, mm0    movq       mm7, mm2    punpcklwd  mm0, mm4    punpcklwd  mm2, mm1    punpckhwd  mm3, mm4    punpckhwd  mm7, mm1    movq       mm4, mm2    movq       mm1, mm7    punpckhdq  mm2, mm0    punpckhdq  mm7, mm3    punpckldq  mm4, mm0    punpckldq  mm1, mm3    pshufw     mm2, mm2, 0x1b    pshufw     mm7, mm7, 0x1b    movq [r0+2*28], mm4    movq [r0+2*43], mm1    movq [r0+2*39], mm2    movq [r0+2*50], mm7    RET;-----------------------------------------------------------------------------; void x264_zigzag_scan_4x4_frame_mmx( int16_t level[16], int16_t dct[4][4] );-----------------------------------------------------------------------------cglobal x264_zigzag_scan_4x4_frame_mmx, 2,2    movq       mm0, [r1]    movq       mm1, [r1+8]    movq       mm2, [r1+16]    movq       mm3, [r1+24]    movq       mm4, mm0    movq       mm5, mm1    movq       mm6, mm2    movq       mm7, mm3    psllq      mm3, 16    psrlq      mm0, 16    punpckldq  mm2, mm2    punpckhdq  mm1, mm1    punpcklwd  mm4, mm5    punpcklwd  mm5, mm3    punpckldq  mm4, mm0    punpckhwd  mm5, mm2    punpckhwd  mm0, mm6    punpckhwd  mm6, mm7    punpcklwd  mm1, mm0    punpckhdq  mm3, mm6    movq      [r0], mm4    movq    [r0+8], mm5    movq   [r0+16], mm1    movq   [r0+24], mm3    RET;-----------------------------------------------------------------------------; void x264_zigzag_scan_4x4_frame_ssse3( int16_t level[16], int16_t dct[4][4] );-----------------------------------------------------------------------------cglobal x264_zigzag_scan_4x4_frame_ssse3, 2,2    movdqa    xmm1, [r1+16]    movdqa    xmm0, [r1]    pshufb    xmm1, [pb_scan4frameb GLOBAL]    pshufb    xmm0, [pb_scan4framea GLOBAL]    movdqa    xmm2, xmm1    psrldq    xmm1, 6    palignr   xmm2, xmm0, 6    pslldq    xmm0, 10    palignr   xmm1, xmm0, 10    movdqa    [r0], xmm2    movdqa [r0+16], xmm1    RET;-----------------------------------------------------------------------------; void x264_zigzag_scan_4x4_field_mmxext( int16_t level[16], int16_t dct[4][4] );-----------------------------------------------------------------------------; sse2 is only 1 cycle faster, and ssse3/pshufb is slower on core2cglobal x264_zigzag_scan_4x4_field_mmxext, 2,3    pshufw     mm0, [r1+4], 0xd2    movq       mm1, [r1+16]    movq       mm2, [r1+24]    movq    [r0+4], mm0    movq   [r0+16], mm1    movq   [r0+24], mm2    mov        r2d, [r1]    mov       [r0], r2d    mov        r2d, [r1+12]    mov    [r0+12], r2d    RET;-----------------------------------------------------------------------------; void x264_zigzag_sub_4x4_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst );-----------------------------------------------------------------------------cglobal x264_zigzag_sub_4x4_frame_ssse3, 3,3,8    movd      xmm0, [r1+0*FENC_STRIDE]    movd      xmm1, [r1+1*FENC_STRIDE]    movd      xmm2, [r1+2*FENC_STRIDE]    movd      xmm3, [r1+3*FENC_STRIDE]    movd      xmm4, [r2+0*FDEC_STRIDE]    movd      xmm5, [r2+1*FDEC_STRIDE]    movd      xmm6, [r2+2*FDEC_STRIDE]    movd      xmm7, [r2+3*FDEC_STRIDE]    movd      [r2+0*FDEC_STRIDE], xmm0    movd      [r2+1*FDEC_STRIDE], xmm1    movd      [r2+2*FDEC_STRIDE], xmm2    movd      [r2+3*FDEC_STRIDE], xmm3    punpckldq xmm0, xmm1    punpckldq xmm2, xmm3    punpckldq xmm4, xmm5    punpckldq xmm6, xmm7    punpcklqdq xmm0, xmm2    punpcklqdq xmm4, xmm6    movdqa    xmm7, [pb_sub4frame GLOBAL]    pshufb    xmm0, xmm7    pshufb    xmm4, xmm7    pxor      xmm6, xmm6    movdqa    xmm1, xmm0    movdqa    xmm5, xmm4    punpcklbw xmm0, xmm6    punpckhbw xmm1, xmm6    punpcklbw xmm4, xmm6    punpckhbw xmm5, xmm6    psubw     xmm0, xmm4    psubw     xmm1, xmm5    movdqa    [r0], xmm0    movdqa [r0+16], xmm1    RET;-----------------------------------------------------------------------------; void x264_zigzag_interleave_8x8_cavlc_mmx( int16_t *dst, int16_t *src, uint8_t *nnz );-----------------------------------------------------------------------------%macro INTERLEAVE 1    movq   m0, [r1+%1*4+ 0]    movq   m1, [r1+%1*4+ 8]    movq   m2, [r1+%1*4+16]    movq   m3, [r1+%1*4+24]    TRANSPOSE4x4W 0,1,2,3,4    movq   [r0+%1+ 0], m0    movq   [r0+%1+32], m1    movq   [r0+%1+64], m2    movq   [r0+%1+96], m3%if %1    packsswb m0, m1    por    m6, m2    por    m7, m3    por    m5, m0%else    packsswb m0, m1    SWAP   m5, m0    SWAP   m6, m2    SWAP   m7, m3%endif%endmacroINIT_MMXcglobal x264_zigzag_interleave_8x8_cavlc_mmx, 3,3    INTERLEAVE  0    INTERLEAVE  8    INTERLEAVE 16    INTERLEAVE 24    packsswb m6, m7    packsswb m5, m6    packsswb m5, m5    pxor     m0, m0    pcmpeqb  m5, m0    paddb    m5, [pb_1 GLOBAL]    movd    r0d, m5    mov  [r2+0], r0w    shr     r0d, 16    mov  [r2+8], r0w    RET%macro INTERLEAVE_XMM 1    mova   m0, [r1+%1*4+ 0]    mova   m1, [r1+%1*4+16]    mova   m4, [r1+%1*4+32]    mova   m5, [r1+%1*4+48]    SBUTTERFLY wd, 0, 1, 6    SBUTTERFLY wd, 4, 5, 7    SBUTTERFLY wd, 0, 1, 6    SBUTTERFLY wd, 4, 5, 7    movq   [r0+%1+  0], m0    movhps [r0+%1+ 32], m0    movq   [r0+%1+ 64], m1    movhps [r0+%1+ 96], m1    movq   [r0+%1+  8], m4    movhps [r0+%1+ 40], m4    movq   [r0+%1+ 72], m5    movhps [r0+%1+104], m5%if %1    por    m2, m0    por    m3, m1    por    m2, m4    por    m3, m5%else    SWAP 0,2    SWAP 3,1    por    m2, m4    por    m3, m5%endif%endmacroINIT_XMMcglobal x264_zigzag_interleave_8x8_cavlc_sse2, 3,3,8    INTERLEAVE_XMM  0    INTERLEAVE_XMM 16    packsswb m2, m3    pxor     m5, m5    packsswb m2, m2    packsswb m2, m2    pcmpeqb  m5, m2    paddb    m5, [pb_1 GLOBAL]    movd    r0d, m5    mov  [r2+0], r0w    shr     r0d, 16    mov  [r2+8], r0w    RET

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -