📄 quant_non_intra.c

📁 Motion JPEG编解码器源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* quant_non_intra.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2002  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_quantize.h"#if defined(ALTIVEC_VERIFY) && ALTIVEC_TEST_FUNCTION(quant_non_intra)#include <stdlib.h>#include <string.h>#endif#include "vectorize.h"#include <math.h>#include "../mjpeg_logging.h"#include "../../mpeg2enc/syntaxconsts.h"#include "../../mpeg2enc/quantize_precomp.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif/* * The original C version would start-over from the beginning each time * clipping occurred (until saturated) which resulted in the possibility of * most dst[] values being re-calculated many times. This version, if clipping * is detected, restarts calculating from the current block. Once it's finished * it will re-calculate blocks that need it starting with block 0. */#define QUANT_NON_INTRA_PDECL                                                \    struct QuantizerWorkSpace *wsp,                                          \    int16_t *src, int16_t *dst,                                              \    int q_scale_type, int dctsatlim, int *nonsat_mquant                      \#define QUANT_NON_INTRA_ARGS \    wsp, src, dst, q_scale_type, dctsatlim, nonsat_mquant#define QUANT_NON_INTRA_PFMT \    "wsp=0x%X, src=0x%X, dst=0x%X, q_scale_type=%d, dctsatlim=%d, " \    "nonsat_mquant=0x%X"int quant_non_intra_altivec(QUANT_NON_INTRA_PDECL){    int mquant = *nonsat_mquant;    int i, j, N, nzblockbits, last_block, recalc_blocks;    vector unsigned short *pqm;    vector unsigned short *inter_q_mat = wsp->inter_q_mat;    signed short *ps, *pd;    vector unsigned short zero, four;    vector float one;    vector unsigned short qmA, qmB;		/* quant matrix */    vector signed short srcA, srcB;		/* source */    vector signed short dstA, dstB;		/* destination */    vector float sA0, sA1, sB0, sB1;		/* dividend */    vector float dA0, dA1, dB0, dB1;		/* divisor */    vector float reA0, reA1, reB0, reB1;	/* reciprocal */    vector float qtA0, qtA1, qtB0, qtB1;	/* quotient */    vector float rmA0, rmA1, rmB0, rmB1;	/* remainder */    vector bool short selA, selB;		/* bool selector */    vector bool short nz;			/* non-zero */    vector unsigned short max;			/* max value */    vector unsigned short t1, t2, t3, t4;    /* vuv & vu are used to share values between vector and scalar code.     * vu lives on the stack and vuv is a vector register. Using vuv     * instead of vu.v allows control over when read/writes to vu are done.     */    vector unsigned short vuv;    union {	/* do not use v, load vu into vuv for vector access. */	vector unsigned short v;	struct {	    unsigned short mquant;	    unsigned short clipvalue;	    unsigned int nz;	} s;    } vu;#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->inter_q_mat))	mjpeg_error_exit1("quant_non_intra: wsp->inter_q_mat %% 16 != 0, (%d)",	    wsp->inter_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("quant_non_intra: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("quant_non_intra: dst %% 16 != 0, (%d)", dst);#endif /* }}} */#define QUANT_NON_INTRA_AB /* {{{ */                                         \    qmA = vec_ld(0, pqm);                                                    \    pqm++;                                                                   \    qmB = vec_ld(0, pqm);                                                    \    pqm++;                                                                   \    srcA = vec_ld(0, ps);                                                    \    ps += 8;                                                                 \    srcB = vec_ld(0, ps);                                                    \    ps += 8;                                                                 \									     \    /* calculate divisor */                                                  \    vu16(dA0) = vec_mergeh(zero, qmA);                                       \    vu16(dA1) = vec_mergel(zero, qmA);                                       \    vu16(dB0) = vec_mergeh(zero, qmB);                                       \    vu16(dB1) = vec_mergel(zero, qmB);                                       \    vuv = vec_ld(0, (unsigned short*)&vu);                                   \    vuv = vec_splat(vuv, 0); /* splat mquant */                              \    vu32(dA0)  = vec_mulo(vu16(dA0), vuv);                                   \    vu32(dA1)  = vec_mulo(vu16(dA1), vuv);                                   \    vu32(dB0)  = vec_mulo(vu16(dB0), vuv);                                   \    vu32(dB1)  = vec_mulo(vu16(dB1), vuv);                                   \    dA0  = vec_ctf(vu32(dA0), 0);                                            \    dA1  = vec_ctf(vu32(dA1), 0);                                            \    dB0  = vec_ctf(vu32(dB0), 0);                                            \    dB1  = vec_ctf(vu32(dB1), 0);                                            \    reA0 = vec_re(dA0);                                                      \    reA1 = vec_re(dA1);                                                      \    reB0 = vec_re(dB0);                                                      \    reB1 = vec_re(dB1);                                                      \									     \    /* refinement #1 */                                                      \    vfp(t1) = vec_nmsub(reA0, vfp(dA0), vfp(one));                           \    vfp(t2) = vec_nmsub(reA1, vfp(dA1), vfp(one));                           \    vfp(t3) = vec_nmsub(reB0, vfp(dB0), vfp(one));                           \    vfp(t4) = vec_nmsub(reB1, vfp(dB1), vfp(one));                           \    reA0 = vec_madd(reA0, vfp(t1), reA0);                                    \    reA1 = vec_madd(reA1, vfp(t2), reA1);                                    \    reB0 = vec_madd(reB0, vfp(t3), reB0);                                    \    reB1 = vec_madd(reB1, vfp(t4), reB1);                                    \									     \    /* refinement #2 */                                                      \    vfp(t1) = vec_nmsub(reA0, vfp(dA0), vfp(one));                           \    vfp(t2) = vec_nmsub(reA1, vfp(dA1), vfp(one));                           \    vfp(t3) = vec_nmsub(reB0, vfp(dB0), vfp(one));                           \    vfp(t4) = vec_nmsub(reB1, vfp(dB1), vfp(one));                           \    reA0 = vec_madd(reA0, vfp(t1), reA0);                                    \    reA1 = vec_madd(reA1, vfp(t2), reA1);                                    \    reB0 = vec_madd(reB0, vfp(t3), reB0);                                    \    reB1 = vec_madd(reB1, vfp(t4), reB1);                                    \									     \    /* (sA0,sB0) = abs(ps[n],ps[n+1]) << 4 {{{ */                            \    vs16(t1) = vec_subs(vs16(zero), srcA);                                   \    vs16(t2) = vec_subs(vs16(zero), srcB);                                   \    vs16(t3) = vec_max(srcA, vs16(t1));                                      \    vs16(t4) = vec_max(srcB, vs16(t2));                                      \    four = vec_splat_u16(4);                                                 \    vu16(t1) = vec_sl(vu16(t3), four);                                       \    vu16(t2) = vec_sl(vu16(t4), four);                                       \    /* }}} */                                                                \									     \    vu16(sA0) = vec_mergeh(zero, vu16(t1));                                  \    vu16(sA1) = vec_mergel(zero, vu16(t1));                                  \    vu16(sB0) = vec_mergeh(zero, vu16(t2));                                  \    vu16(sB1) = vec_mergel(zero, vu16(t2));                                  \    vfp(sA0) = vec_ctf(vu32(sA0), 0);                                        \    vfp(sA1) = vec_ctf(vu32(sA1), 0);                                        \    vfp(sB0) = vec_ctf(vu32(sB0), 0);                                        \    vfp(sB1) = vec_ctf(vu32(sB1), 0);                                        \									     \    /* calculate quotient */                                                 \    vfp(qtA0)  = vec_madd(vfp(sA0), reA0, vfp(zero));                        \    vfp(qtA1)  = vec_madd(vfp(sA1), reA1, vfp(zero));                        \    vfp(qtB0)  = vec_madd(vfp(sB0), reB0, vfp(zero));                        \    vfp(qtB1)  = vec_madd(vfp(sB1), reB1, vfp(zero));                        \									     \    /* calculate remainder */                                                \    vfp(rmA0)  = vec_nmsub(vfp(dA0), vfp(qtA0), vfp(sA0));                   \    vfp(rmA1)  = vec_nmsub(vfp(dA1), vfp(qtA1), vfp(sA1));                   \    vfp(rmB0)  = vec_nmsub(vfp(dB0), vfp(qtB0), vfp(sB0));                   \    vfp(rmB1)  = vec_nmsub(vfp(dB1), vfp(qtB1), vfp(sB1));                   \									     \    /* round quotient with remainder */                                      \    vfp(qtA0)  = vec_madd(vfp(rmA0), reA0, vfp(qtA0));                       \    vfp(qtA1)  = vec_madd(vfp(rmA1), reA1, vfp(qtA1));                       \    vfp(qtB0)  = vec_madd(vfp(rmB0), reB0, vfp(qtB0));                       \    vfp(qtB1)  = vec_madd(vfp(rmB1), reB1, vfp(qtB1));                       \									     \    /* convert to integer */                                                 \    vu32(qtA0) = vec_ctu(vfp(qtA0), 0);                                      \    vu32(qtA1) = vec_ctu(vfp(qtA1), 0);                                      \    vu32(qtB0) = vec_ctu(vfp(qtB0), 0);                                      \    vu32(qtB1) = vec_ctu(vfp(qtB1), 0);                                      \									     \    vu16(dstA) = vec_pack(vu32(qtA0), vu32(qtA1));                           \    vu16(dstB) = vec_pack(vu32(qtB0), vu32(qtB1));                           \									     \    /* test for non-zero values */                                           \    selA = vec_cmpgt(vu16(dstA), zero);                                      \    selB = vec_cmpgt(vu16(dstB), zero);                                      \    nz = vec_or(nz, selA);                                                   \    nz = vec_or(nz, selB);                                                   \    /* }}} */#define SIGN_AND_STORE /* {{{ */                                             \    /* sign dst blocks */                                                    \    selA = vec_cmpgt(vs16(zero), srcA);                                      \    selB = vec_cmpgt(vs16(zero), srcB);                                      \    vs16(t1) = vec_subs(vs16(zero), dstA);                                   \    vs16(t2) = vec_subs(vs16(zero), dstB);                                   \    dstA = vec_sel(dstA, vs16(t1), selA);                                    \    dstB = vec_sel(dstB, vs16(t2), selB);                                    \									     \    /* store dst blocks */                                                   \    vec_st(dstA, 0, pd);                                                     \    pd += 8;                                                                 \    vec_st(dstB, 0, pd);                                                     \    pd += 8;                                                                 \    /* }}} */#define UPDATE_NZBLOCKBITS /* {{{ */                                         \    /* quasi-count the non-zero values and store to vu.s.nz */               \    vs32(nz) = vec_sums(vs32(nz), vs32(zero));                               \    vu32(nz) = vec_splat(vu32(nz), 3);                                       \    vuv = vec_ld(0, (unsigned short*)&vu);                                   \    /* vuv = ( vuv(mquant, clipvalue), nz, (), () ) */                       \    vu32(vuv) = vec_mergeh(vu32(vuv), vu32(nz));                             \    vec_st(vuv, 0, (unsigned short*)&vu); /* store for scalar access */      \    nzblockbits |= ((!!vu.s.nz) << i);    /* set non-zero block bit */       \    /* }}} */    AMBER_START;
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -