📄 mpegvideo_mmx_template.c
字号:
/* * MPEG video MMX templates * * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */#undef MMREG_WIDTH#undef MM#undef MOVQ#undef SPREADW#undef PMAXW#undef PMAX#undef SAVE_SIGN#undef RESTORE_SIGN#if defined(HAVE_SSE2)#define MMREG_WIDTH "16"#define MM "%%xmm"#define MOVQ "movdqa"#define SPREADW(a) \ "pshuflw $0, "a", "a" \n\t"\ "punpcklwd "a", "a" \n\t"#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"#define PMAX(a,b) \ "movhlps "a", "b" \n\t"\ PMAXW(b, a)\ "pshuflw $0x0E, "a", "b" \n\t"\ PMAXW(b, a)\ "pshuflw $0x01, "a", "b" \n\t"\ PMAXW(b, a)#else#define MMREG_WIDTH "8"#define MM "%%mm"#define MOVQ "movq"#if defined(HAVE_MMX2)#define SPREADW(a) "pshufw $0, "a", "a" \n\t"#define PMAXW(a,b) "pmaxsw "a", "b" \n\t"#define PMAX(a,b) \ "pshufw $0x0E, "a", "b" \n\t"\ PMAXW(b, a)\ "pshufw $0x01, "a", "b" \n\t"\ PMAXW(b, a)#else#define SPREADW(a) \ "punpcklwd "a", "a" \n\t"\ "punpcklwd "a", "a" \n\t"#define PMAXW(a,b) \ "psubusw "a", "b" \n\t"\ "paddw "a", "b" \n\t"#define PMAX(a,b) \ "movq "a", "b" \n\t"\ "psrlq $32, "a" \n\t"\ PMAXW(b, a)\ "movq "a", "b" \n\t"\ "psrlq $16, "a" \n\t"\ PMAXW(b, a)#endif#endif#ifdef HAVE_SSSE3#define SAVE_SIGN(a,b) \ "movdqa "b", "a" \n\t"\ "pabsw "b", "b" \n\t"#define RESTORE_SIGN(a,b) \ "psignw "a", "b" \n\t"#else#define SAVE_SIGN(a,b) \ "pxor "a", "a" \n\t"\ "pcmpgtw "b", "a" \n\t" /* block[i] <= 0 ? 0xFF : 0x00 */\ "pxor "a", "b" \n\t"\ "psubw "a", "b" \n\t" /* ABS(block[i]) */#define RESTORE_SIGN(a,b) \ "pxor "a", "b" \n\t"\ "psubw "a", "b" \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])#endifstatic int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow){ long last_non_zero_p1; int level=0, q; //=0 is cuz gcc says uninitialized ... const uint16_t *qmat, *bias; DECLARE_ALIGNED_16(int16_t, temp_block[64]); assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? //s->fdct (block); RENAMEl(ff_fdct) (block); //cannot be anything else ... if(s->dct_error_sum) s->denoise_dct(s, block); if (s->mb_intra) { int dummy; if (n < 4) q = s->y_dc_scale; else q = s->c_dc_scale; /* note: block[0] is assumed to be positive */ if (!s->h263_aic) {#if 1 asm volatile ( "mul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1]) );#else asm volatile ( "xorl %%edx, %%edx \n\t" "divw %%cx \n\t" "movzwl %%ax, %%eax \n\t" : "=a" (level) : "a" ((block[0]>>2) + q), "c" (q<<1) : "%edx" );#endif } else /* For AIC we skip quant/dequant of INTRADC */ level = (block[0] + 4)>>3; block[0]=0; //avoid fake overflow// temp_block[0] = (block[0] + (q >> 1)) / q; last_non_zero_p1 = 1; bias = s->q_intra_matrix16[qscale][1]; qmat = s->q_intra_matrix16[qscale][0]; } else { last_non_zero_p1 = 0; bias = s->q_inter_matrix16[qscale][1]; qmat = s->q_inter_matrix16[qscale][0]; } if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ asm volatile( "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0 MOVQ" (%2), "MM"5 \n\t" // qmat[0] "pxor "MM"6, "MM"6 \n\t" "psubw (%3), "MM"6 \n\t" // -bias[0] "mov $-128, %%"REG_a" \n\t" ASMALIGN(4) "1: \n\t" MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 "por "MM"0, "MM"4 \n\t" RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) MOVQ" "MM"0, (%5, %%"REG_a") \n\t" "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 "pandn "MM"1, "MM"0 \n\t" PMAXW(MM"0", MM"3") "add $"MMREG_WIDTH", %%"REG_a" \n\t" " js 1b \n\t" PMAX(MM"3", MM"0") "movd "MM"3, %%"REG_a" \n\t" "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat), "r" (bias), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); }else{ // FMT_H263 asm volatile( "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -