⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quant.c

📁 从服务器上下外的 x264编码器最新的源码
💻 C
字号:
/****************************************************************************** quant.c: h264 encoder****************************************************************************** Copyright (C) 2007 Guillaume Poirier <gpoirier@mplayerhq.hu>** This program is free software; you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation; either version 2 of the License, or* (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the* GNU General Public License for more details.** You should have received a copy of the GNU General Public License* along with this program; if not, write to the Free Software* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.*****************************************************************************/#include "common/common.h"#include "ppccommon.h"#include "quant.h"            // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"#define QUANT_16_U( idx0, idx1 )                                             \temp1v = vec_ld((idx0), *dct);                                               \temp2v = vec_ld((idx1), *dct);                                               \mfvA = vec_ld((idx0), mf);                                                   \mfvB = vec_ld((idx1), mf);                                                   \biasvA = vec_ld((idx0), bias);                                               \biasvB = vec_ld((idx1), bias);                                               \mskA = vec_cmplt(temp1v, zero_s16v);                                         \mskB = vec_cmplt(temp2v, zero_s16v);                                         \coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);             \coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);             \coefvA = vec_adds(coefvA, biasvA);                                           \coefvB = vec_adds(coefvB, biasvB);                                           \multEvenvA = vec_mule(coefvA, mfvA);                                         \multOddvA = vec_mulo(coefvA, mfvA);                                          \multEvenvB = vec_mule(coefvB, mfvB);                                         \multOddvB = vec_mulo(coefvB, mfvB);                                          \multEvenvA = vec_sr(multEvenvA, i_qbitsv);                                   \multOddvA = vec_sr(multOddvA, i_qbitsv);                                     \multEvenvB = vec_sr(multEvenvB, i_qbitsv);                                   \multOddvB = vec_sr(multOddvB, i_qbitsv);                                     \temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \temp1v = vec_xor(temp1v, mskA);                                              \temp2v = vec_xor(temp2v, mskB);                                              \temp1v = vec_adds(temp1v, vec_and(mskA, one));                               \vec_st(temp1v, (idx0), (int16_t*)dct);                                       \temp2v = vec_adds(temp2v, vec_and(mskB, one));                               \nz = vec_or(nz, vec_or(temp1v, temp2v));                                     \vec_st(temp2v, (idx1), (int16_t*)dct);                int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] ){    LOAD_ZERO;    vector bool short mskA;    vec_u32_t i_qbitsv;    vec_u16_t coefvA;    vec_u32_t multEvenvA, multOddvA;    vec_u16_t mfvA;    vec_u16_t biasvA;    vec_s16_t one = vec_splat_s16(1);;    vec_s16_t nz = zero_s16v;    vector bool short mskB;    vec_u16_t coefvB;    vec_u32_t multEvenvB, multOddvB;    vec_u16_t mfvB;    vec_u16_t biasvB;    vec_s16_t temp1v, temp2v;    vec_u32_u qbits_u;    qbits_u.s[0]=16;    i_qbitsv = vec_splat(qbits_u.v, 0);    QUANT_16_U( 0, 16 );    return vec_any_ne(nz, zero_s16v);}// DC quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"#define QUANT_16_U_DC( idx0, idx1 )                             \temp1v = vec_ld((idx0), *dct);                                  \temp2v = vec_ld((idx1), *dct);                                  \mskA = vec_cmplt(temp1v, zero_s16v);                            \mskB = vec_cmplt(temp2v, zero_s16v);                            \coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\coefvB = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp2v), temp2v);\coefvA = vec_add(coefvA, biasv);                                \coefvB = vec_add(coefvB, biasv);                                \multEvenvA = vec_mule(coefvA, mfv);                             \multOddvA = vec_mulo(coefvA, mfv);                              \multEvenvB = vec_mule(coefvB, mfv);                             \multOddvB = vec_mulo(coefvB, mfv);                              \multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \multOddvA = vec_sr(multOddvA, i_qbitsv);                        \multEvenvB = vec_sr(multEvenvB, i_qbitsv);                      \multOddvB = vec_sr(multOddvB, i_qbitsv);                        \temp1v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvB, multOddvB), vec_mergel(multEvenvB, multOddvB)); \temp1v = vec_xor(temp1v, mskA);                                 \temp2v = vec_xor(temp2v, mskB);                                 \temp1v = vec_add(temp1v, vec_and(mskA, one));                   \vec_st(temp1v, (idx0), (int16_t*)dct);                          \temp2v = vec_add(temp2v, vec_and(mskB, one));                   \nz = vec_or(nz, vec_or(temp1v, temp2v));                        \vec_st(temp2v, (idx1), (int16_t*)dct);int x264_quant_4x4_dc_altivec( int16_t dct[4][4], int mf, int bias ){    LOAD_ZERO;    vector bool short mskA;    vec_u32_t i_qbitsv;    vec_u16_t coefvA;    vec_u32_t multEvenvA, multOddvA;    vec_s16_t one = vec_splat_s16(1);    vec_s16_t nz = zero_s16v;    vector bool short mskB;    vec_u16_t coefvB;    vec_u32_t multEvenvB, multOddvB;    vec_s16_t temp1v, temp2v;    vec_u16_t mfv;    vec_u16_t biasv;    vec_u16_u mf_u;    mf_u.s[0]=mf;    mfv = vec_splat( mf_u.v, 0 );    vec_u32_u qbits_u;    qbits_u.s[0]=16;    i_qbitsv = vec_splat(qbits_u.v, 0);    vec_u16_u bias_u;    bias_u.s[0]=bias;    biasv = vec_splat(bias_u.v, 0);    QUANT_16_U_DC( 0, 16 );    return vec_any_ne(nz, zero_s16v);}// DC quant of a whole 2x2 block#define QUANT_4_U_DC( idx0 )                                    \const vec_u16_t sel = (vec_u16_t) CV(-1,-1,-1,-1,0,0,0,0);      \temp1v = vec_ld((idx0), *dct);                                  \mskA = vec_cmplt(temp1v, zero_s16v);                            \coefvA = (vec_u16_t)vec_max(vec_sub(zero_s16v, temp1v), temp1v);\coefvA = vec_add(coefvA, biasv);                                \multEvenvA = vec_mule(coefvA, mfv);                             \multOddvA = vec_mulo(coefvA, mfv);                              \multEvenvA = vec_sr(multEvenvA, i_qbitsv);                      \multOddvA = vec_sr(multOddvA, i_qbitsv);                        \temp2v = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA), vec_mergel(multEvenvA, multOddvA)); \temp2v = vec_xor(temp2v, mskA);                                 \temp2v = vec_add(temp2v, vec_and(mskA, one));                   \temp1v = vec_sel(temp1v, temp2v, sel);                          \nz = vec_or(nz, temp1v);                                        \vec_st(temp1v, (idx0), (int16_t*)dct);int x264_quant_2x2_dc_altivec( int16_t dct[2][2], int mf, int bias ){    LOAD_ZERO;    vector bool short mskA;    vec_u32_t i_qbitsv;    vec_u16_t coefvA;    vec_u32_t multEvenvA, multOddvA;    vec_s16_t one = vec_splat_s16(1);    vec_s16_t nz = zero_s16v;    vec_s16_t temp1v, temp2v;    vec_u16_t mfv;    vec_u16_t biasv;    vec_u16_u mf_u;    mf_u.s[0]=mf;    mfv = vec_splat( mf_u.v, 0 );    vec_u32_u qbits_u;    qbits_u.s[0]=16;    i_qbitsv = vec_splat(qbits_u.v, 0);    vec_u16_u bias_u;    bias_u.s[0]=bias;    biasv = vec_splat(bias_u.v, 0);    static const vec_s16_t mask2 = CV(-1, -1, -1, -1,  0, 0, 0, 0);    QUANT_4_U_DC(0);    return vec_any_ne(vec_and(nz, mask2), zero_s16v);}int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64] ){    LOAD_ZERO;    vector bool short mskA;    vec_u32_t i_qbitsv;    vec_u16_t coefvA;    vec_u32_t multEvenvA, multOddvA;    vec_u16_t mfvA;    vec_u16_t biasvA;    vec_s16_t one = vec_splat_s16(1);;    vec_s16_t nz = zero_s16v;    vector bool short mskB;    vec_u16_t coefvB;    vec_u32_t multEvenvB, multOddvB;    vec_u16_t mfvB;    vec_u16_t biasvB;    vec_s16_t temp1v, temp2v;        vec_u32_u qbits_u;    qbits_u.s[0]=16;    i_qbitsv = vec_splat(qbits_u.v, 0);        int i;    for ( i=0; i<4; i++ ) {      QUANT_16_U( i*2*16, i*2*16+16 );    }    return vec_any_ne(nz, zero_s16v);}#define DEQUANT_SHL()                                                \{                                                                    \    dctv = vec_ld(0, dct[y]);                                        \    mf1v = vec_ld(0, dequant_mf[i_mf][y]);                           \    mf2v = vec_ld(16, dequant_mf[i_mf][y]);                          \    mfv  = vec_packs(mf1v, mf2v);                                    \                                                                     \    multEvenvA = vec_mule(dctv, mfv);                                \    multOddvA = vec_mulo(dctv, mfv);                                 \    dctv = (vec_s16_t) vec_packs(vec_mergeh(multEvenvA, multOddvA),  \                                 vec_mergel(multEvenvA, multOddvA)); \    dctv = vec_sl(dctv, i_qbitsv);                                   \    vec_st(dctv, 0, dct[y]);                                         \}#define DEQUANT_SHR()                                          \{                                                              \    dctv = vec_ld(0, dct[y]);                                  \    dct1v = vec_mergeh(dctv, dctv);                            \    dct2v = vec_mergel(dctv, dctv);                            \    mf1v = vec_ld(0, dequant_mf[i_mf][y]);                     \    mf2v = vec_ld(16, dequant_mf[i_mf][y]);                    \                                                               \    multEvenvA = vec_mule(dct1v, (vec_s16_t)mf1v);             \    multOddvA = vec_mulo(dct1v, (vec_s16_t)mf1v);              \    temp1v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \    temp1v = vec_add(temp1v, fv);                              \    temp1v = vec_sra(temp1v, i_qbitsv);                        \                                                               \    multEvenvA = vec_mule(dct2v, (vec_s16_t)mf2v);             \    multOddvA = vec_mulo(dct2v, (vec_s16_t)mf2v);              \    temp2v = vec_add(vec_sl(multEvenvA, sixteenv), multOddvA); \    temp2v = vec_add(temp2v, fv);                              \    temp2v = vec_sra(temp2v, i_qbitsv);                        \                                                               \    dctv = (vec_s16_t)vec_packs(temp1v, temp2v);               \    vec_st(dctv, 0, dct[y]);                                   \}void x264_dequant_4x4_altivec( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp ){    const int i_mf = i_qp%6;    const int i_qbits = i_qp/6 - 4;    int y;    vec_s16_t dctv;    vec_s16_t dct1v, dct2v;    vec_s32_t mf1v, mf2v;    vec_s16_t mfv;    vec_s32_t multEvenvA, multOddvA;    vec_s32_t temp1v, temp2v;    if( i_qbits >= 0 )    {        vec_u16_t i_qbitsv;        vec_u16_u qbits_u;        qbits_u.s[0]=i_qbits;        i_qbitsv = vec_splat(qbits_u.v, 0);        for( y = 0; y < 4; y+=2 )            DEQUANT_SHL();    }    else    {        const int f = 1 << (-i_qbits-1);        vec_s32_t fv;        vec_u32_u f_u;        f_u.s[0]=f;        fv = (vec_s32_t)vec_splat(f_u.v, 0);        vec_u32_t i_qbitsv;        vec_u32_u qbits_u;        qbits_u.s[0]=-i_qbits;        i_qbitsv = vec_splat(qbits_u.v, 0);        vec_u32_t sixteenv;        vec_u32_u sixteen_u;        sixteen_u.s[0]=16;        sixteenv = vec_splat(sixteen_u.v, 0);        for( y = 0; y < 4; y+=2 )            DEQUANT_SHR();    }}void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i_qp ){    const int i_mf = i_qp%6;    const int i_qbits = i_qp/6 - 6;    int y;    vec_s16_t dctv;    vec_s16_t dct1v, dct2v;    vec_s32_t mf1v, mf2v;    vec_s16_t mfv;    vec_s32_t multEvenvA, multOddvA;    vec_s32_t temp1v, temp2v;    if( i_qbits >= 0 )    {        vec_u16_t i_qbitsv;        vec_u16_u qbits_u;        qbits_u.s[0]=i_qbits;        i_qbitsv = vec_splat(qbits_u.v, 0);        for( y = 0; y < 8; y++ )            DEQUANT_SHL();    }    else    {        const int f = 1 << (-i_qbits-1);        vec_s32_t fv;        vec_u32_u f_u;        f_u.s[0]=f;        fv = (vec_s32_t)vec_splat(f_u.v, 0);        vec_u32_t i_qbitsv;        vec_u32_u qbits_u;        qbits_u.s[0]=-i_qbits;        i_qbitsv = vec_splat(qbits_u.v, 0);        vec_u32_t sixteenv;        vec_u32_u sixteen_u;        sixteen_u.s[0]=16;        sixteenv = vec_splat(sixteen_u.v, 0);        for( y = 0; y < 8; y++ )            DEQUANT_SHR();    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -