📄 mpegvideo_mmx_template.c

📁 linux下的MPEG1
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * MPEG video MMX templates * * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */#undef SPREADW#undef PMAXW#undef PMAX#ifdef HAVE_MMX2#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"#define PMAXW(a,b) "pmaxsw " #a ", " #b "     \n\t"#define PMAX(a,b) \            "pshufw $0x0E," #a ", " #b "        \n\t"\            PMAXW(b, a)\            "pshufw $0x01," #a ", " #b "        \n\t"\            PMAXW(b, a)#else#define SPREADW(a) \        "punpcklwd " #a ", " #a " \n\t"\        "punpcklwd " #a ", " #a " \n\t"#define PMAXW(a,b) \        "psubusw " #a ", " #b " \n\t"\        "paddw " #a ", " #b "   \n\t"#define PMAX(a,b)  \            "movq " #a ", " #b "                \n\t"\            "psrlq $32, " #a "                  \n\t"\            PMAXW(b, a)\            "movq " #a ", " #b "                \n\t"\            "psrlq $16, " #a "                  \n\t"\            PMAXW(b, a)#endifstatic int RENAME(dct_quantize)(MpegEncContext *s,                            DCTELEM *block, int n,                            int qscale, int *overflow){    long last_non_zero_p1;    int level=0, q; //=0 is cuz gcc says uninitalized ...    const uint16_t *qmat, *bias;    DECLARE_ALIGNED_8(int16_t, temp_block[64]);    assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?    //s->fdct (block);    RENAMEl(ff_fdct) (block); //cant be anything else ...    if(s->dct_error_sum)        s->denoise_dct(s, block);    if (s->mb_intra) {        int dummy;        if (n < 4)            q = s->y_dc_scale;        else            q = s->c_dc_scale;        /* note: block[0] is assumed to be positive */        if (!s->h263_aic) {#if 1        asm volatile (                "mul %%ecx                \n\t"                : "=d" (level), "=a"(dummy)                : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1])        );#else        asm volatile (                "xorl %%edx, %%edx        \n\t"                "divw %%cx                \n\t"                "movzwl %%ax, %%eax       \n\t"                : "=a" (level)                : "a" ((block[0]>>2) + q), "c" (q<<1)                : "%edx"        );#endif        } else            /* For AIC we skip quant/dequant of INTRADC */            level = (block[0] + 4)>>3;        block[0]=0; //avoid fake overflow//        temp_block[0] = (block[0] + (q >> 1)) / q;        last_non_zero_p1 = 1;        bias = s->q_intra_matrix16[qscale][1];        qmat = s->q_intra_matrix16[qscale][0];    } else {        last_non_zero_p1 = 0;        bias = s->q_inter_matrix16[qscale][1];        qmat = s->q_inter_matrix16[qscale][0];    }    if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){        asm volatile(            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1            SPREADW(%%mm3)            "pxor %%mm7, %%mm7                  \n\t" // 0            "pxor %%mm4, %%mm4                  \n\t" // 0            "movq (%2), %%mm5                   \n\t" // qmat[0]            "pxor %%mm6, %%mm6                  \n\t"            "psubw (%3), %%mm6                  \n\t" // -bias[0]            "mov $-128, %%"REG_a"               \n\t"            ASMALIGN(4)            "1:                                 \n\t"            "pxor %%mm1, %%mm1                  \n\t" // 0            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00            "pxor %%mm1, %%mm0                  \n\t"            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])            "psubusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16            "por %%mm0, %%mm4                   \n\t"            "pxor %%mm1, %%mm0                  \n\t"            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])            "movq %%mm0, (%5, %%"REG_a")        \n\t"            "pcmpeqw %%mm7, %%mm0               \n\t" // out==0 ? 0xFF : 0x00            "movq (%4, %%"REG_a"), %%mm1        \n\t"            "movq %%mm7, (%1, %%"REG_a")        \n\t" // 0            "pandn %%mm1, %%mm0                 \n\t"            PMAXW(%%mm0, %%mm3)            "add $8, %%"REG_a"                  \n\t"            " js 1b                             \n\t"            PMAX(%%mm3, %%mm0)            "movd %%mm3, %%"REG_a"              \n\t"            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1            : "+a" (last_non_zero_p1)            : "r" (block+64), "r" (qmat), "r" (bias),              "r" (inv_zigzag_direct16+64), "r" (temp_block+64)        );        // note the asm is split cuz gcc doesnt like that many operands ...        asm volatile(            "movd %1, %%mm1                     \n\t" // max_qcoeff            SPREADW(%%mm1)            "psubusw %%mm1, %%mm4               \n\t"            "packuswb %%mm4, %%mm4              \n\t"            "movd %%mm4, %0                     \n\t" // *overflow        : "=g" (*overflow)        : "g" (s->max_qcoeff)        );    }else{ // FMT_H263        asm volatile(            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1            SPREADW(%%mm3)            "pxor %%mm7, %%mm7                  \n\t" // 0            "pxor %%mm4, %%mm4                  \n\t" // 0            "mov $-128, %%"REG_a"               \n\t"            ASMALIGN(4)            "1:                                 \n\t"            "pxor %%mm1, %%mm1                  \n\t" // 0            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00            "pxor %%mm1, %%mm0                  \n\t"            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])            "movq (%3, %%"REG_a"), %%mm6        \n\t" // bias[0]            "paddusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]            "movq (%2, %%"REG_a"), %%mm5        \n\t" // qmat[i]            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16            "por %%mm0, %%mm4                   \n\t"            "pxor %%mm1, %%mm0                  \n\t"            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])            "movq %%mm0, (%5, %%"REG_a")        \n\t"
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -