⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 iquant_intra.c

📁 Motion JPEG编解码器源代码
💻 C
字号:
/* iquant_intra.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2002  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_quantize.h"#if defined(ALTIVEC_VERIFY) && ALTIVEC_TEST_FUNCTION(iquant_intra_m1)#  include <stdlib.h>#  include <string.h>#endif#include "vectorize.h"#include "../mjpeg_logging.h"#include "../../mpeg2enc/quantize_precomp.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define IQUANT_INTRA_PDECL struct QuantizerWorkSpace *wsp, \    int16_t *src, int16_t *dst, int dc_prec, int mquant#define IQUANT_INTRA_ARGS wsp, src, dst, dc_prec, mquant#define IQUANT_INTRA_PFMT "wsp=0x%X, src=0x%X, dst=0x%X, dc_prec=%d, mquant=%d"void iquant_intra_m1_altivec(IQUANT_INTRA_PDECL){    int i;    vector signed short vsrc;    uint16_t *qmat;    vector unsigned short vqmat;    vector unsigned short vmquant;    vector bool short eqzero, ltzero;    vector signed short val, t0;    vector signed short zero, one;    vector unsigned int four;    vector signed short min, max;    int offset, offset2;    int16_t dst0;    union {	vector unsigned short vu16;	unsigned short mquant;	vector signed int vs32;	struct {	    signed int pad[3];	    signed int sum;	} s;    } vu;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->intra_q_mat))	mjpeg_error_exit1("iquant_intra_m1: wsp->intra_q_mat %% 16 != 0, (%d)",	    wsp->intra_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("iquant_intra_m1: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("iquant_intra_m1: dst %% 16 != 0, (%d)", dst);    for (i = 0; i < 64; i++)	if (src[i] < -256 || src[i] > 255)	    mjpeg_error_exit1("iquant_intra_m2: -256 > src[%i] > 255, (%d)",		i, src[i]);#endif /* }}} */    AMBER_START;    dst0 = src[0] << (3 - dc_prec);    qmat = (uint16_t*)wsp->intra_q_mat;#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(64/8,1,0);    vec_dst(src, dsc.control, 0);    vec_dst(qmat, dsc.control, 1);#endif    /* vmquant = (vector unsigned short)(mquant); */    vu.mquant = (unsigned short)mquant;    vmquant = vec_splat(vu.vu16, 0);    zero = vec_splat_s16(0);    one = vec_splat_s16(1);    four = vec_splat_u32(4);    /* max = (2047); min = (-2048); {{{ */    vu8(max) = vec_splat_u8(0x7);    t0 = vec_splat_s16(-1); /* 0xffff */    vu8(max) = vec_mergeh(vu8(max), vu8(t0)); /* 0x07ff == 2047 */    min = vec_sub(t0, max);    /* }}} */    offset = 0;#if 1    vsrc = vec_ld(offset, (signed short*)src);    vqmat = vec_ld(offset, (unsigned short*)qmat);    i = (64/8) - 1;    do {	/* intra_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	/* save sign */	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = (src * quant) >> 4 */	vs32(t0) = vec_mule(val, vs16(vqmat));	vs32(val) = vec_mulo(val, vs16(vqmat));	vs32(t0) = vec_sra(vs32(t0), four);	vs16(t0) = vec_pack(vs32(t0), vs32(t0));	vs32(val) = vec_sra(vs32(val), four);	vs16(val) = vec_pack(vs32(val), vs32(val));	val = vec_mergeh(vs16(t0), vs16(val));	offset2 = offset;	offset += 8*sizeof(int16_t);	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* val = val - 1&~(val|val==0) */	t0 = vec_or(val, eqzero);	t0 = vec_andc(one, t0);	val = vec_sub(val, t0);	/* restore sign */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	vec_st(val, offset2, dst);    } while (--i);    /* intra_q[i] * mquant */    vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));    /* save sign */    ltzero = vec_cmplt(vsrc, zero);    eqzero = vec_cmpeq(vsrc, zero);    /* val = abs(src) */    t0 = vec_sub(zero, vsrc);    val = vec_max(t0, vsrc);    /* val = (src * quant) >> 4 */    vs32(t0) = vec_mule(val, vs16(vqmat));    vs32(val) = vec_mulo(val, vs16(vqmat));    vs32(t0) = vec_sra(vs32(t0), four);    vs16(t0) = vec_pack(vs32(t0), vs32(t0));    vs32(val) = vec_sra(vs32(val), four);    vs16(val) = vec_pack(vs32(val), vs32(val));    val = vec_mergeh(vs16(t0), vs16(val));    /* val = val - 1&~(val|val==0) */    t0 = vec_or(val, eqzero);    t0 = vec_andc(one, t0);    val = vec_sub(val, t0);    /* restore sign */    t0 = vec_sub(zero, val);    val = vec_sel(val, t0, ltzero);    /* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */    val = vec_min(val, max);    val = vec_max(val, min);    vec_st(val, offset, dst);#else    /* {{{ */    i = (64/8);    do {	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* intra_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	/* save sign */	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = (src * quant) >> 4 */	vs32(t0) = vec_mule(val, vs16(vqmat));	vs32(val) = vec_mulo(val, vs16(vqmat));	vs32(t0) = vec_sra(vs32(t0), four);	vs16(t0) = vec_pack(vs32(t0), vs32(t0));	vs32(val) = vec_sra(vs32(val), four);	vs16(val) = vec_pack(vs32(val), vs32(val));	val = vec_mergeh(vs16(t0), vs16(val));	/* val = val - 1&~(val|val==0) */	t0 = vec_or(val, eqzero);	t0 = vec_andc(one, t0);	val = vec_sub(val, t0);	/* restore sign */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	vec_st(val, offset, dst);	offset += 8*sizeof(int16_t);    } while (--i);    /* }}} */#endif    dst[0] = dst0;    AMBER_STOP;}void iquant_intra_m2_altivec(IQUANT_INTRA_PDECL){    int i;    vector signed short vsrc;    uint16_t *qmat;    vector unsigned short vqmat;    vector unsigned short vmquant;    vector bool short ltzero;    vector signed short val, t0;    vector signed short zero;    vector unsigned int four;    vector signed short min, max;    vector signed int vsum;    int sum;    int offset, offset2;    int16_t dst0;    union {	vector unsigned short vu16;	unsigned short mquant;	vector signed int vs32;	struct {	    signed int pad[3];	    signed int sum;	} s;    } vu;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->intra_q_mat))	mjpeg_error_exit1("iquant_intra_m2: wsp->intra_q_mat %% 16 != 0, (%d)",	    wsp->intra_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("iquant_intra_m2: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("iquant_intra_m2: dst %% 16 != 0, (%d)", dst);    for (i = 0; i < 64; i++)	if (src[i] < -256 || src[i] > 255)	    mjpeg_error_exit1("iquant_intra_m2: -256 > src[%i] > 255, (%d)",		i, src[i]);#endif /* }}} */    AMBER_START;    dst0 = src[0] << (3 - dc_prec);    qmat = (uint16_t*)wsp->intra_q_mat;#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(64/8,1,0);    vec_dst(src, dsc.control, 0);    vec_dst(qmat, dsc.control, 1);#endif    /* vmquant = (vector unsigned short)(mquant); */    vu.mquant = (unsigned short)mquant;    vmquant = vec_splat(vu.vu16, 0);    vsum = vec_splat_s32(0);    zero = vec_splat_s16(0);    four = vec_splat_u32(4);    /* max = (2047); min = (-2048); {{{ */    vu8(max) = vec_splat_u8(0x7);    t0 = vec_splat_s16(-1); /* 0xffff */    vu8(max) = vec_mergeh(vu8(max), vu8(t0)); /* 0x07ff == 2047 */    min = vec_sub(t0, max);    /* }}} */    offset = 0;#if 1    vsrc = vec_ld(offset, (signed short*)src);    vqmat = vec_ld(offset, (unsigned short*)qmat);    i = (64/8) - 1;    do {	/* intra_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	/* save sign */	ltzero = vec_cmplt(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = (src * quant) >> 4 */	vs32(t0) = vec_mule(val, vs16(vqmat));	vs32(val) = vec_mulo(val, vs16(vqmat));	vs32(t0) = vec_sra(vs32(t0), four);	vs16(t0) = vec_pack(vs32(t0), vs32(t0));	vs32(val) = vec_sra(vs32(val), four);	vs16(val) = vec_pack(vs32(val), vs32(val));	val = vec_mergeh(vs16(t0), vs16(val));	offset2 = offset;	offset += 8*sizeof(int16_t);	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* restore sign */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	vsum = vec_sum4s(val, vsum);	vec_st(val, offset2, dst);    } while (--i);    /* intra_q[i] * mquant */    vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));    /* save sign */    ltzero = vec_cmplt(vsrc, zero);    /* val = abs(src) */    t0 = vec_sub(zero, vsrc);    val = vec_max(t0, vsrc);    /* val = (src * quant) >> 4 */    vs32(t0) = vec_mule(val, vs16(vqmat));    vs32(val) = vec_mulo(val, vs16(vqmat));    vs32(t0) = vec_sra(vs32(t0), four);    vs16(t0) = vec_pack(vs32(t0), vs32(t0));    vs32(val) = vec_sra(vs32(val), four);    vs16(val) = vec_pack(vs32(val), vs32(val));    val = vec_mergeh(vs16(t0), vs16(val));    /* restore sign */    t0 = vec_sub(zero, val);    val = vec_sel(val, t0, ltzero);    /* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */    val = vec_min(val, max);    val = vec_max(val, min);    vsum = vec_sum4s(val, vsum);    vec_st(val, offset, dst);#else    /* {{{ */    i = (64/8);    do {	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* intra_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	/* save sign */	ltzero = vec_cmplt(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = (src * quant) >> 4 */	vs32(t0) = vec_mule(val, vs16(vqmat));	vs32(val) = vec_mulo(val, vs16(vqmat));	vs32(t0) = vec_sra(vs32(t0), four);	vs16(t0) = vec_pack(vs32(t0), vs32(t0));	vs32(val) = vec_sra(vs32(val), four);	vs16(val) = vec_pack(vs32(val), vs32(val));	val = vec_mergeh(vs16(t0), vs16(val));	/* restore sign */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	vsum = vec_sum4s(val, vsum);	vec_st(val, offset, dst);	offset += 8*sizeof(int16_t);    } while (--i);    /* }}} */#endif    vu.vs32 = vec_sums(vsum, vs32(zero));    sum = vu.s.sum;    sum -= dst[0];    sum += dst0;    dst[0] = dst0;    /* mismatch control */#if 1    dst[63] ^= !(sum & 1);#else    if ((sum & 1) == 0)	dst[63] ^= 1;#endif    AMBER_STOP;}/* iquant_intra_altivec_verify {{{ */#if ALTIVEC_TEST_FUNCTION(iquant_intra_m1) || \    ALTIVEC_TEST_FUNCTION(iquant_intra_m2)static void iquant_intra_altivec_verify(IQUANT_INTRA_PDECL,    void (*test)(IQUANT_INTRA_PDECL),    void (*verify)(IQUANT_INTRA_PDECL)){    int i;    unsigned long checksum1, checksum2;    int16_t srccpy[64], dstcpy[64];    uint16_t *qmat;    qmat = (uint16_t*) wsp->intra_q_mat;    /* in case src == dst */    memcpy(srccpy, src, 64*sizeof(int16_t));    (*test)(IQUANT_INTRA_ARGS);    for (checksum1 = i = 0; i < 64; i++)	checksum1 += dst[i];    memcpy(dstcpy, dst, 64*sizeof(int16_t));    memcpy(src, srccpy, 64*sizeof(int16_t));    (*verify)(IQUANT_INTRA_ARGS);    for (checksum2 = i = 0; i < 64; i++)	checksum2 += dst[i];    if (checksum1 != checksum2) {	mjpeg_debug("iquant_intra(" IQUANT_INTRA_PFMT ")",	    IQUANT_INTRA_ARGS);	mjpeg_debug("iquant_intra: checksums differ %d != %d",		    checksum1, checksum2);    }    for (i = 0; i < 64; i++) {	if (dstcpy[i] != dst[i]) {	    mjpeg_debug("iquant_intra: src[%d]=%d, qmat=%d, "			"dst[%d]=%d != %d", i, srccpy[i], qmat[i]*mquant,			i, dstcpy[i], dst[i]);	}    }}#endif /* }}} */#if ALTIVEC_TEST_FUNCTION(iquant_intra_m1) /* {{{ */#  ifdef ALTIVEC_VERIFYvoid iquant_intra_m1_altivec_verify(IQUANT_INTRA_PDECL){    iquant_intra_altivec_verify(IQUANT_INTRA_ARGS,	iquant_intra_m1_altivec, ALTIVEC_TEST_WITH(iquant_intra_m1));}#  elseALTIVEC_TEST(iquant_intra_m1, void, (IQUANT_INTRA_PDECL),    IQUANT_INTRA_PFMT, IQUANT_INTRA_ARGS);#  endif#endif /* }}} */#if ALTIVEC_TEST_FUNCTION(iquant_intra_m2) /* {{{ */#  ifdef ALTIVEC_VERIFYvoid iquant_intra_m2_altivec_verify(IQUANT_INTRA_PDECL){    iquant_intra_altivec_verify(IQUANT_INTRA_ARGS,	iquant_intra_m2_altivec, ALTIVEC_TEST_WITH(iquant_intra_m2));}#  elseALTIVEC_TEST(iquant_intra_m2, void, (IQUANT_INTRA_PDECL),    IQUANT_INTRA_PFMT, IQUANT_INTRA_ARGS);#  endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -