predict-a.asm

来自「从服务器上下的x264编码器C源码……希望对大家有帮助……这个是09年4月的」· 汇编代码 · 共 1,260 行 · 第 1/3 页
ASM
1,260 行
;*****************************************************************************;* predict-a.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2005-2008 x264 project;*;* Authors: Loren Merritt <lorenm@u.washington.edu>;*          Holger Lubitz <holger@lubitz.org>;*          Jason Garrett-Glaser <darkshikari@gmail.com>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.;*****************************************************************************%include "x86inc.asm"%include "x86util.asm"%macro STORE8x8 2    add r0, 4*FDEC_STRIDE    movq        [r0 + -4*FDEC_STRIDE], %1    movq        [r0 + -3*FDEC_STRIDE], %1    movq        [r0 + -2*FDEC_STRIDE], %1    movq        [r0 + -1*FDEC_STRIDE], %1    movq        [r0 +  0*FDEC_STRIDE], %2    movq        [r0 +  1*FDEC_STRIDE], %2    movq        [r0 +  2*FDEC_STRIDE], %2    movq        [r0 +  3*FDEC_STRIDE], %2%endmacro%macro STORE16x16 2    mov         r1d, 4.loop:    movq        [r0 + 0*FDEC_STRIDE], %1    movq        [r0 + 1*FDEC_STRIDE], %1    movq        [r0 + 2*FDEC_STRIDE], %1    movq        [r0 + 3*FDEC_STRIDE], %1    movq        [r0 + 0*FDEC_STRIDE + 8], %2    movq        [r0 + 1*FDEC_STRIDE + 8], %2    movq        [r0 + 2*FDEC_STRIDE + 8], %2    movq        [r0 + 3*FDEC_STRIDE + 8], %2    add         r0, 4*FDEC_STRIDE    dec         r1d    jg          .loop%endmacro%macro STORE16x16_SSE2 1    add r0, 4*FDEC_STRIDE    movdqa      [r0 + -4*FDEC_STRIDE], %1    movdqa      [r0 + -3*FDEC_STRIDE], %1    movdqa      [r0 + -2*FDEC_STRIDE], %1    movdqa      [r0 + -1*FDEC_STRIDE], %1    movdqa      [r0 +  0*FDEC_STRIDE], %1    movdqa      [r0 +  1*FDEC_STRIDE], %1    movdqa      [r0 +  2*FDEC_STRIDE], %1    movdqa      [r0 +  3*FDEC_STRIDE], %1    add r0, 8*FDEC_STRIDE    movdqa      [r0 + -4*FDEC_STRIDE], %1    movdqa      [r0 + -3*FDEC_STRIDE], %1    movdqa      [r0 + -2*FDEC_STRIDE], %1    movdqa      [r0 + -1*FDEC_STRIDE], %1    movdqa      [r0 +  0*FDEC_STRIDE], %1    movdqa      [r0 +  1*FDEC_STRIDE], %1    movdqa      [r0 +  2*FDEC_STRIDE], %1    movdqa      [r0 +  3*FDEC_STRIDE], %1%endmacroSECTION_RODATAALIGN 16pb_1:       times 16 db 1pb_3:       times 16 db 3pw_2:       times 4 dw 2pw_4:       times 4 dw 4pw_8:       times 8 dw 8pw_76543210:pw_3210:    dw 0, 1, 2, 3, 4, 5, 6, 7pb_00s_ff:  times 8 db 0pb_0s_ff:   times 7 db 0            db 0xffpw_ff00:    times 8 dw 0xff00pb_reverse: db 7, 6, 5, 4, 3, 2, 1, 0SECTION .text; dest, left, right, src, tmp; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2%macro PRED8x8_LOWPASS0 6    mov%6       %5, %2    pavgb       %2, %3    pxor        %3, %5    mov%6       %1, %4    pand        %3, [pb_1 GLOBAL]    psubusb     %2, %3    pavgb       %1, %2%endmacro%macro PRED8x8_LOWPASS 5    PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, q%endmacro%macro PRED8x8_LOWPASS_XMM 5    PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, dqa%endmacro%macro LOAD_PLANE_ARGS 0%ifdef ARCH_X86_64    movd        mm0, r1d    movd        mm2, r2d    movd        mm4, r3d    pshufw      mm0, mm0, 0    pshufw      mm2, mm2, 0    pshufw      mm4, mm4, 0%else    pshufw      mm0, r1m, 0    pshufw      mm2, r2m, 0    pshufw      mm4, r3m, 0%endif%endmacro;-----------------------------------------------------------------------------; void predict_4x4_ddl_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_ddl_mmxext, 1,1    movq    mm1, [r0-FDEC_STRIDE]    movq    mm2, mm1    movq    mm3, mm1    movq    mm4, mm1    psllq   mm1, 8    pxor    mm2, mm1    psrlq   mm2, 8    pxor    mm3, mm2    PRED8x8_LOWPASS mm0, mm1, mm3, mm4, mm5%assign Y 0%rep 4    psrlq       mm0, 8    movd        [r0+Y*FDEC_STRIDE], mm0%assign Y (Y+1)%endrep    RET;-----------------------------------------------------------------------------; void predict_4x4_ddr_mmxext( uint8_t *src );-----------------------------------------------------------------------------%macro PREDICT_4x4 1cglobal predict_4x4_ddr_%1, 1,1    movq      mm1, [r0+1*FDEC_STRIDE-8]    movq      mm2, [r0+0*FDEC_STRIDE-8]    punpckhbw mm2, [r0-1*FDEC_STRIDE-8]    movd      mm3, [r0-1*FDEC_STRIDE]    punpckhwd mm1, mm2    PALIGNR   mm3, mm1, 5, mm1    movq      mm1, mm3    PALIGNR   mm3, [r0+2*FDEC_STRIDE-8], 7, mm4    movq      mm2, mm3    PALIGNR   mm3, [r0+3*FDEC_STRIDE-8], 7, mm4    PRED8x8_LOWPASS mm0, mm3, mm1, mm2, mm4%assign Y 3    movd    [r0+Y*FDEC_STRIDE], mm0%rep 3%assign Y (Y-1)    psrlq    mm0, 8    movd    [r0+Y*FDEC_STRIDE], mm0%endrep    RETcglobal predict_4x4_vr_%1, 1,1    movd    mm0, [r0-1*FDEC_STRIDE]              ; ........t3t2t1t0    movq    mm7, mm0    PALIGNR mm0, [r0-1*FDEC_STRIDE-8], 7, mm1    ; ......t3t2t1t0lt    pavgb   mm7, mm0    PALIGNR mm0, [r0+0*FDEC_STRIDE-8], 7, mm1    ; ....t3t2t1t0ltl0    movq    mm1, mm0    PALIGNR mm0, [r0+1*FDEC_STRIDE-8], 7, mm2    ; ..t3t2t1t0ltl0l1    movq    mm2, mm0    PALIGNR mm0, [r0+2*FDEC_STRIDE-8], 7, mm3    ; t3t2t1t0ltl0l1l2    PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4    movq    mm1, mm3    psrlq   mm3, 16    psllq   mm1, 48    movd   [r0+0*FDEC_STRIDE], mm7    movd   [r0+1*FDEC_STRIDE], mm3    PALIGNR mm7, mm1, 7, mm2    psllq   mm1, 8    movd   [r0+2*FDEC_STRIDE], mm7    PALIGNR mm3, mm1, 7, mm1    movd   [r0+3*FDEC_STRIDE], mm3    RETcglobal predict_4x4_hd_%1, 1,1    movd      mm0, [r0-1*FDEC_STRIDE-4] ; lt ..    punpckldq mm0, [r0-1*FDEC_STRIDE]   ; t3 t2 t1 t0 lt .. .. ..    psllq     mm0, 8                    ; t2 t1 t0 lt .. .. .. ..    movq      mm1, [r0+3*FDEC_STRIDE-8] ; l3    punpckhbw mm1, [r0+2*FDEC_STRIDE-8] ; l2 l3    movq      mm2, [r0+1*FDEC_STRIDE-8] ; l1    punpckhbw mm2, [r0+0*FDEC_STRIDE-8] ; l0 l1    punpckhwd mm1, mm2                  ; l0 l1 l2 l3    punpckhdq mm1, mm0                  ; t2 t1 t0 lt l0 l1 l2 l3    movq      mm0, mm1    movq      mm2, mm1    movq      mm7, mm1    psrlq     mm0, 16                   ; .. .. t2 t1 t0 lt l0 l1    psrlq     mm2, 8                    ; .. t2 t1 t0 lt l0 l1 l2    pavgb     mm7, mm2    PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4    punpcklbw mm7, mm3    psrlq     mm3, 32    PALIGNR   mm3, mm7, 6, mm6%assign Y 3    movd     [r0+Y*FDEC_STRIDE], mm7%rep 2%assign Y (Y-1)    psrlq     mm7, 16    movd     [r0+Y*FDEC_STRIDE], mm7%endrep    movd     [r0+0*FDEC_STRIDE], mm3    RET%endmacro%define PALIGNR PALIGNR_MMXPREDICT_4x4 mmxext%define PALIGNR PALIGNR_SSSE3PREDICT_4x4 ssse3;-----------------------------------------------------------------------------; void predict_4x4_hu_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_hu_mmxext, 1,1    movq      mm0, [r0+0*FDEC_STRIDE-8]    punpckhbw mm0, [r0+1*FDEC_STRIDE-8]    movq      mm1, [r0+2*FDEC_STRIDE-8]    punpckhbw mm1, [r0+3*FDEC_STRIDE-8]    punpckhwd mm0, mm1    movq      mm1, mm0    punpckhbw mm1, mm1    pshufw    mm1, mm1, 0xFF    punpckhdq mm0, mm1    movq      mm2, mm0    movq      mm3, mm0    movq      mm7, mm0    psrlq     mm2, 16    psrlq     mm3, 8    pavgb     mm7, mm3    PRED8x8_LOWPASS mm4, mm0, mm2, mm3, mm5    punpcklbw mm7, mm4%assign Y 0    movd    [r0+Y*FDEC_STRIDE], mm7%rep 2%assign Y (Y+1)    psrlq    mm7, 16    movd    [r0+Y*FDEC_STRIDE], mm7%endrep    movd    [r0+3*FDEC_STRIDE], mm1    RET;-----------------------------------------------------------------------------; void predict_4x4_vl_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_vl_mmxext, 1,1    movq        mm1, [r0-FDEC_STRIDE]    movq        mm3, mm1    movq        mm2, mm1    psrlq       mm3, 8    psrlq       mm2, 16    movq        mm4, mm3    pavgb       mm4, mm1    PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5    movd        [r0+0*FDEC_STRIDE], mm4    movd        [r0+1*FDEC_STRIDE], mm0    psrlq       mm4, 8    psrlq       mm0, 8    movd        [r0+2*FDEC_STRIDE], mm4    movd        [r0+3*FDEC_STRIDE], mm0    RET;-----------------------------------------------------------------------------; void predict_4x4_dc( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_dc_mmxext, 1,4    pxor   mm7, mm7    movd   mm0, [r0-FDEC_STRIDE]    psadbw mm0, mm7    movd   r3d, mm0    movzx  r1d, byte [r0-1]%assign n 1%rep 3    movzx  r2d, byte [r0+FDEC_STRIDE*n-1]    add    r1d, r2d%assign n n+1%endrep    lea    r1d, [r1+r3+4]    shr    r1d, 3    imul   r1d, 0x01010101    mov   [r0+FDEC_STRIDE*0], r1d    mov   [r0+FDEC_STRIDE*1], r1d    mov   [r0+FDEC_STRIDE*2], r1d    mov   [r0+FDEC_STRIDE*3], r1d    RET%macro PREDICT_FILTER 1;-----------------------------------------------------------------------------;void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );-----------------------------------------------------------------------------cglobal predict_8x8_filter_%1, 4,5    add          r0, 0x58%define src r0-0x58%ifndef ARCH_X86_64    mov          r4, r1%define t1 r4%define t4 r1%else%define t1 r1%define t4 r4%endif    test        r3b, 0x01    je .check_top    movq        mm0, [src+0*FDEC_STRIDE-8]    punpckhbw   mm0, [src-1*FDEC_STRIDE-8]    movq        mm1, [src+2*FDEC_STRIDE-8]    punpckhbw   mm1, [src+1*FDEC_STRIDE-8]    punpckhwd   mm1, mm0    movq        mm2, [src+4*FDEC_STRIDE-8]    punpckhbw   mm2, [src+3*FDEC_STRIDE-8]    movq        mm3, [src+6*FDEC_STRIDE-8]    punpckhbw   mm3, [src+5*FDEC_STRIDE-8]    punpckhwd   mm3, mm2    punpckhdq   mm3, mm1    movq        mm0, [src+7*FDEC_STRIDE-8]    movq        mm1, [src-1*FDEC_STRIDE]    movq        mm4, mm3    movq        mm2, mm3    PALIGNR     mm4, mm0, 7, mm0    PALIGNR     mm1, mm2, 1, mm2    test        r2b, 0x08    je .fix_lt_1.do_left:    movq        mm0, mm4    PRED8x8_LOWPASS mm2, mm1, mm4, mm3, mm5    movq     [t1+8], mm2    movq        mm4, mm0    PRED8x8_LOWPASS mm1, mm3, mm0, mm4, mm5    movd         t4, mm1    mov      [t1+7], t4b.check_top:    test        r3b, 0x02    je .done    movq        mm0, [src-1*FDEC_STRIDE-8]    movq        mm3, [src-1*FDEC_STRIDE]    movq        mm1, [src-1*FDEC_STRIDE+8]    movq        mm2, mm3    movq        mm4, mm3    PALIGNR     mm2, mm0, 7, mm0    PALIGNR     mm1, mm4, 1, mm4    test        r2b, 0x08    je .fix_lt_2    test        r2b, 0x04    je .fix_tr_1.do_top:    PRED8x8_LOWPASS mm4, mm2, mm1, mm3, mm5    movq    [t1+16], mm4    test        r3b, 0x04    je .done    test        r2b, 0x04    je .fix_tr_2    movq        mm0, [src-1*FDEC_STRIDE+8]    movq        mm5, mm0    movq        mm2, mm0    movq        mm4, mm0    psrlq       mm5, 56    PALIGNR     mm2, mm3, 7, mm3    PALIGNR     mm5, mm4, 1, mm4    PRED8x8_LOWPASS mm1, mm2, mm5, mm0, mm4    jmp .do_topright.fix_tr_2:    punpckhbw   mm3, mm3    pshufw      mm1, mm3, 0xFF.do_topright:    movq    [t1+24], mm1    psrlq       mm1, 56    movd         t4, mm1    mov     [t1+32], t4b.done:    REP_RET.fix_lt_1:    movq        mm5, mm3    pxor        mm5, mm4    psrlq       mm5, 56    psllq       mm5, 48    pxor        mm1, mm5    jmp .do_left.fix_lt_2:    movq        mm5, mm3    pxor        mm5, mm2    psllq       mm5, 56    psrlq       mm5, 56    pxor        mm2, mm5    test        r2b, 0x04    jne .do_top.fix_tr_1:    movq        mm5, mm3    pxor        mm5, mm1    psrlq       mm5, 56    psllq       mm5, 56    pxor        mm1, mm5    jmp .do_top
predict-a.asm - 源码说明

本页面展示了「从服务器上下的x264编码器C源码……希望对大家有帮助……这个是09年4月的」中的 predict-a.asm 源码文件，采用汇编编程语言编写，共 1,260 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与x264相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?