📄 predict-a.asm
字号:
;*****************************************************************************;* predict-a.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2005-2008 x264 project;*;* Authors: Loren Merritt <lorenm@u.washington.edu>;* Holger Lubitz <holger@lubitz.org>;* Jason Garrett-Glaser <darkshikari@gmail.com>;*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.;*****************************************************************************%include "x86inc.asm"%include "x86util.asm"%macro STORE8x8 2 add r0, 4*FDEC_STRIDE movq [r0 + -4*FDEC_STRIDE], %1 movq [r0 + -3*FDEC_STRIDE], %1 movq [r0 + -2*FDEC_STRIDE], %1 movq [r0 + -1*FDEC_STRIDE], %1 movq [r0 + 0*FDEC_STRIDE], %2 movq [r0 + 1*FDEC_STRIDE], %2 movq [r0 + 2*FDEC_STRIDE], %2 movq [r0 + 3*FDEC_STRIDE], %2%endmacro%macro STORE16x16 2 mov r1d, 4.loop: movq [r0 + 0*FDEC_STRIDE], %1 movq [r0 + 1*FDEC_STRIDE], %1 movq [r0 + 2*FDEC_STRIDE], %1 movq [r0 + 3*FDEC_STRIDE], %1 movq [r0 + 0*FDEC_STRIDE + 8], %2 movq [r0 + 1*FDEC_STRIDE + 8], %2 movq [r0 + 2*FDEC_STRIDE + 8], %2 movq [r0 + 3*FDEC_STRIDE + 8], %2 add r0, 4*FDEC_STRIDE dec r1d jg .loop%endmacro%macro STORE16x16_SSE2 1 add r0, 4*FDEC_STRIDE movdqa [r0 + -4*FDEC_STRIDE], %1 movdqa [r0 + -3*FDEC_STRIDE], %1 movdqa [r0 + -2*FDEC_STRIDE], %1 movdqa [r0 + -1*FDEC_STRIDE], %1 movdqa [r0 + 0*FDEC_STRIDE], %1 movdqa [r0 + 1*FDEC_STRIDE], %1 movdqa [r0 + 2*FDEC_STRIDE], %1 movdqa [r0 + 3*FDEC_STRIDE], %1 add r0, 8*FDEC_STRIDE movdqa [r0 + -4*FDEC_STRIDE], %1 movdqa [r0 + -3*FDEC_STRIDE], %1 movdqa [r0 + -2*FDEC_STRIDE], %1 movdqa [r0 + -1*FDEC_STRIDE], %1 movdqa [r0 + 0*FDEC_STRIDE], %1 movdqa [r0 + 1*FDEC_STRIDE], %1 movdqa [r0 + 2*FDEC_STRIDE], %1 movdqa [r0 + 3*FDEC_STRIDE], %1%endmacroSECTION_RODATAALIGN 16pb_1: times 16 db 1pb_3: times 16 db 3pw_2: times 4 dw 2pw_4: times 4 dw 4pw_8: times 8 dw 8pw_76543210:pw_3210: dw 0, 1, 2, 3, 4, 5, 6, 7pb_00s_ff: times 8 db 0pb_0s_ff: times 7 db 0 db 0xffpw_ff00: times 8 dw 0xff00pb_reverse: db 7, 6, 5, 4, 3, 2, 1, 0SECTION .text; dest, left, right, src, tmp; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2%macro PRED8x8_LOWPASS0 6 mov%6 %5, %2 pavgb %2, %3 pxor %3, %5 mov%6 %1, %4 pand %3, [pb_1 GLOBAL] psubusb %2, %3 pavgb %1, %2%endmacro%macro PRED8x8_LOWPASS 5 PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, q%endmacro%macro PRED8x8_LOWPASS_XMM 5 PRED8x8_LOWPASS0 %1, %2, %3, %4, %5, dqa%endmacro%macro LOAD_PLANE_ARGS 0%ifdef ARCH_X86_64 movd mm0, r1d movd mm2, r2d movd mm4, r3d pshufw mm0, mm0, 0 pshufw mm2, mm2, 0 pshufw mm4, mm4, 0%else pshufw mm0, r1m, 0 pshufw mm2, r2m, 0 pshufw mm4, r3m, 0%endif%endmacro;-----------------------------------------------------------------------------; void predict_4x4_ddl_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_ddl_mmxext, 1,1 movq mm1, [r0-FDEC_STRIDE] movq mm2, mm1 movq mm3, mm1 movq mm4, mm1 psllq mm1, 8 pxor mm2, mm1 psrlq mm2, 8 pxor mm3, mm2 PRED8x8_LOWPASS mm0, mm1, mm3, mm4, mm5%assign Y 0%rep 4 psrlq mm0, 8 movd [r0+Y*FDEC_STRIDE], mm0%assign Y (Y+1)%endrep RET;-----------------------------------------------------------------------------; void predict_4x4_ddr_mmxext( uint8_t *src );-----------------------------------------------------------------------------%macro PREDICT_4x4 1cglobal predict_4x4_ddr_%1, 1,1 movq mm1, [r0+1*FDEC_STRIDE-8] movq mm2, [r0+0*FDEC_STRIDE-8] punpckhbw mm2, [r0-1*FDEC_STRIDE-8] movd mm3, [r0-1*FDEC_STRIDE] punpckhwd mm1, mm2 PALIGNR mm3, mm1, 5, mm1 movq mm1, mm3 PALIGNR mm3, [r0+2*FDEC_STRIDE-8], 7, mm4 movq mm2, mm3 PALIGNR mm3, [r0+3*FDEC_STRIDE-8], 7, mm4 PRED8x8_LOWPASS mm0, mm3, mm1, mm2, mm4%assign Y 3 movd [r0+Y*FDEC_STRIDE], mm0%rep 3%assign Y (Y-1) psrlq mm0, 8 movd [r0+Y*FDEC_STRIDE], mm0%endrep RETcglobal predict_4x4_vr_%1, 1,1 movd mm0, [r0-1*FDEC_STRIDE] ; ........t3t2t1t0 movq mm7, mm0 PALIGNR mm0, [r0-1*FDEC_STRIDE-8], 7, mm1 ; ......t3t2t1t0lt pavgb mm7, mm0 PALIGNR mm0, [r0+0*FDEC_STRIDE-8], 7, mm1 ; ....t3t2t1t0ltl0 movq mm1, mm0 PALIGNR mm0, [r0+1*FDEC_STRIDE-8], 7, mm2 ; ..t3t2t1t0ltl0l1 movq mm2, mm0 PALIGNR mm0, [r0+2*FDEC_STRIDE-8], 7, mm3 ; t3t2t1t0ltl0l1l2 PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4 movq mm1, mm3 psrlq mm3, 16 psllq mm1, 48 movd [r0+0*FDEC_STRIDE], mm7 movd [r0+1*FDEC_STRIDE], mm3 PALIGNR mm7, mm1, 7, mm2 psllq mm1, 8 movd [r0+2*FDEC_STRIDE], mm7 PALIGNR mm3, mm1, 7, mm1 movd [r0+3*FDEC_STRIDE], mm3 RETcglobal predict_4x4_hd_%1, 1,1 movd mm0, [r0-1*FDEC_STRIDE-4] ; lt .. punpckldq mm0, [r0-1*FDEC_STRIDE] ; t3 t2 t1 t0 lt .. .. .. psllq mm0, 8 ; t2 t1 t0 lt .. .. .. .. movq mm1, [r0+3*FDEC_STRIDE-8] ; l3 punpckhbw mm1, [r0+2*FDEC_STRIDE-8] ; l2 l3 movq mm2, [r0+1*FDEC_STRIDE-8] ; l1 punpckhbw mm2, [r0+0*FDEC_STRIDE-8] ; l0 l1 punpckhwd mm1, mm2 ; l0 l1 l2 l3 punpckhdq mm1, mm0 ; t2 t1 t0 lt l0 l1 l2 l3 movq mm0, mm1 movq mm2, mm1 movq mm7, mm1 psrlq mm0, 16 ; .. .. t2 t1 t0 lt l0 l1 psrlq mm2, 8 ; .. t2 t1 t0 lt l0 l1 l2 pavgb mm7, mm2 PRED8x8_LOWPASS mm3, mm1, mm0, mm2, mm4 punpcklbw mm7, mm3 psrlq mm3, 32 PALIGNR mm3, mm7, 6, mm6%assign Y 3 movd [r0+Y*FDEC_STRIDE], mm7%rep 2%assign Y (Y-1) psrlq mm7, 16 movd [r0+Y*FDEC_STRIDE], mm7%endrep movd [r0+0*FDEC_STRIDE], mm3 RET%endmacro%define PALIGNR PALIGNR_MMXPREDICT_4x4 mmxext%define PALIGNR PALIGNR_SSSE3PREDICT_4x4 ssse3;-----------------------------------------------------------------------------; void predict_4x4_hu_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_hu_mmxext, 1,1 movq mm0, [r0+0*FDEC_STRIDE-8] punpckhbw mm0, [r0+1*FDEC_STRIDE-8] movq mm1, [r0+2*FDEC_STRIDE-8] punpckhbw mm1, [r0+3*FDEC_STRIDE-8] punpckhwd mm0, mm1 movq mm1, mm0 punpckhbw mm1, mm1 pshufw mm1, mm1, 0xFF punpckhdq mm0, mm1 movq mm2, mm0 movq mm3, mm0 movq mm7, mm0 psrlq mm2, 16 psrlq mm3, 8 pavgb mm7, mm3 PRED8x8_LOWPASS mm4, mm0, mm2, mm3, mm5 punpcklbw mm7, mm4%assign Y 0 movd [r0+Y*FDEC_STRIDE], mm7%rep 2%assign Y (Y+1) psrlq mm7, 16 movd [r0+Y*FDEC_STRIDE], mm7%endrep movd [r0+3*FDEC_STRIDE], mm1 RET;-----------------------------------------------------------------------------; void predict_4x4_vl_mmxext( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_vl_mmxext, 1,1 movq mm1, [r0-FDEC_STRIDE] movq mm3, mm1 movq mm2, mm1 psrlq mm3, 8 psrlq mm2, 16 movq mm4, mm3 pavgb mm4, mm1 PRED8x8_LOWPASS mm0, mm1, mm2, mm3, mm5 movd [r0+0*FDEC_STRIDE], mm4 movd [r0+1*FDEC_STRIDE], mm0 psrlq mm4, 8 psrlq mm0, 8 movd [r0+2*FDEC_STRIDE], mm4 movd [r0+3*FDEC_STRIDE], mm0 RET;-----------------------------------------------------------------------------; void predict_4x4_dc( uint8_t *src );-----------------------------------------------------------------------------cglobal predict_4x4_dc_mmxext, 1,4 pxor mm7, mm7 movd mm0, [r0-FDEC_STRIDE] psadbw mm0, mm7 movd r3d, mm0 movzx r1d, byte [r0-1]%assign n 1%rep 3 movzx r2d, byte [r0+FDEC_STRIDE*n-1] add r1d, r2d%assign n n+1%endrep lea r1d, [r1+r3+4] shr r1d, 3 imul r1d, 0x01010101 mov [r0+FDEC_STRIDE*0], r1d mov [r0+FDEC_STRIDE*1], r1d mov [r0+FDEC_STRIDE*2], r1d mov [r0+FDEC_STRIDE*3], r1d RET%macro PREDICT_FILTER 1;-----------------------------------------------------------------------------;void predict_8x8_filter( uint8_t *src, uint8_t edge[33], int i_neighbor, int i_filters );-----------------------------------------------------------------------------cglobal predict_8x8_filter_%1, 4,5 add r0, 0x58%define src r0-0x58%ifndef ARCH_X86_64 mov r4, r1%define t1 r4%define t4 r1%else%define t1 r1%define t4 r4%endif test r3b, 0x01 je .check_top movq mm0, [src+0*FDEC_STRIDE-8] punpckhbw mm0, [src-1*FDEC_STRIDE-8] movq mm1, [src+2*FDEC_STRIDE-8] punpckhbw mm1, [src+1*FDEC_STRIDE-8] punpckhwd mm1, mm0 movq mm2, [src+4*FDEC_STRIDE-8] punpckhbw mm2, [src+3*FDEC_STRIDE-8] movq mm3, [src+6*FDEC_STRIDE-8] punpckhbw mm3, [src+5*FDEC_STRIDE-8] punpckhwd mm3, mm2 punpckhdq mm3, mm1 movq mm0, [src+7*FDEC_STRIDE-8] movq mm1, [src-1*FDEC_STRIDE] movq mm4, mm3 movq mm2, mm3 PALIGNR mm4, mm0, 7, mm0 PALIGNR mm1, mm2, 1, mm2 test r2b, 0x08 je .fix_lt_1.do_left: movq mm0, mm4 PRED8x8_LOWPASS mm2, mm1, mm4, mm3, mm5 movq [t1+8], mm2 movq mm4, mm0 PRED8x8_LOWPASS mm1, mm3, mm0, mm4, mm5 movd t4, mm1 mov [t1+7], t4b.check_top: test r3b, 0x02 je .done movq mm0, [src-1*FDEC_STRIDE-8] movq mm3, [src-1*FDEC_STRIDE] movq mm1, [src-1*FDEC_STRIDE+8] movq mm2, mm3 movq mm4, mm3 PALIGNR mm2, mm0, 7, mm0 PALIGNR mm1, mm4, 1, mm4 test r2b, 0x08 je .fix_lt_2 test r2b, 0x04 je .fix_tr_1.do_top: PRED8x8_LOWPASS mm4, mm2, mm1, mm3, mm5 movq [t1+16], mm4 test r3b, 0x04 je .done test r2b, 0x04 je .fix_tr_2 movq mm0, [src-1*FDEC_STRIDE+8] movq mm5, mm0 movq mm2, mm0 movq mm4, mm0 psrlq mm5, 56 PALIGNR mm2, mm3, 7, mm3 PALIGNR mm5, mm4, 1, mm4 PRED8x8_LOWPASS mm1, mm2, mm5, mm0, mm4 jmp .do_topright.fix_tr_2: punpckhbw mm3, mm3 pshufw mm1, mm3, 0xFF.do_topright: movq [t1+24], mm1 psrlq mm1, 56 movd t4, mm1 mov [t1+32], t4b.done: REP_RET.fix_lt_1: movq mm5, mm3 pxor mm5, mm4 psrlq mm5, 56 psllq mm5, 48 pxor mm1, mm5 jmp .do_left.fix_lt_2: movq mm5, mm3 pxor mm5, mm2 psllq mm5, 56 psrlq mm5, 56 pxor mm2, mm5 test r2b, 0x04 jne .do_top.fix_tr_1: movq mm5, mm3 pxor mm5, mm1 psrlq mm5, 56 psllq mm5, 56 pxor mm1, mm5 jmp .do_top
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -