⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 iquant_non_intra.c

📁 Motion JPEG编解码器源代码
💻 C
字号:
/* iquant_non_intra_m1.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2002  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_quantize.h"#if defined(ALTIVEC_VERIFY) && ALTIVEC_TEST_FUNCTION(iquant_non_intra_m1)#  include <stdlib.h>#endif#include "vectorize.h"#include "../mjpeg_logging.h"#include "../../mpeg2enc/quantize_precomp.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define IQUANT_NON_INTRA_PDECL \    struct QuantizerWorkSpace *wsp, int16_t *src, int16_t *dst, int mquant#define IQUANT_NON_INTRA_ARGS wsp, src, dst, mquant#define IQUANT_NON_INTRA_PFMT "wsp=0x%X, src=0x%X, dst=0x%X, mquant=%d"void iquant_non_intra_m1_altivec(IQUANT_NON_INTRA_PDECL){    int i;    vector signed short vsrc;    uint16_t *qmat;    vector unsigned short vqmat;    vector unsigned short vmquant;    vector signed short val, t0;    vector bool short ltzero, eqzero;    vector signed short zero, one;    vector unsigned int five;    vector signed short min, max;    union {	vector unsigned short v;	unsigned short mquant[8];    } vu;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->inter_q_mat))	mjpeg_error_exit1("iquant_non_intra_m1: wsp->inter_q_mat %% 16 != 0, "	    "(%d)", wsp->inter_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("iquant_non_intra_m1: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("iquant_non_intra_m1: dst %% 16 != 0, (%d)", dst);    for (i = 0; i < 64; i++)	if (src[i] < -256 || src[i] > 255)	    mjpeg_error_exit1("iquant_non_intra_m1: -256 > src[%i] > 255, (%d)",		i, src[i]);#endif /* }}} */    AMBER_START;    qmat = (uint16_t*)wsp->inter_q_mat;#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(64/8,1,0);    vec_dst(src, dsc.control, 0);    vec_dst(qmat, dsc.control, 1);#endif    vu.mquant[0] = (unsigned short)mquant;    vmquant = vec_splat(vu.v, 0);    vsrc = vec_ld(0, (signed short*)src);    vqmat = vec_ld(0, qmat);    zero = vec_splat_s16(0);    one = vec_splat_s16(1);    five = vec_splat_u32(5);    /* max = (2047); min = (-2048); {{{ */    vu8(max) = vec_splat_u8(0x7);    t0 = vec_splat_s16(-1); /* 0xffff */    vu8(max) = vec_mergeh(vu8(max), vu8(t0)); /* 0x07ff == 2047 */    min = vec_sub(t0, max);    /* }}} */    i = (64/8) - 1;    do {	/* inter_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = val + val + 1 */	val = vec_add(val, val);	val = vec_add(val, one);	/* val = (val * quant) >> 5 */	vu32(t0) = vec_mule(vu16(val), vqmat);	vu32(t0) = vec_sra(vu32(t0), five);	vu16(t0) = vec_pack(vu32(t0), vu32(t0));	vu32(val) = vec_mulo(vu16(val), vqmat);	vu32(val) = vec_sra(vu32(val), five);	vu16(val) = vec_pack(vu32(val), vu32(val));	vu16(val) = vec_mergeh(vu16(t0), vu16(val));	src += 8;	vsrc = vec_ld(0, (signed short*)src);	qmat += 8;	vqmat = vec_ld(0, (unsigned short*)qmat);	/* val = val - 1&~(val|val==0) */	t0 = vec_or(val, eqzero);	t0 = vec_andc(one, t0);	val = vec_sub(val, t0);	/* val = samesign(src, val) */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	/* if (src[i] == 0) dst[i] = 0 */	val = vec_andc(val, eqzero);	vec_st(val, 0, dst);	dst += 8;    } while (--i);    /* inter_q[i] * mquant */    vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));    ltzero = vec_cmplt(vsrc, zero);    eqzero = vec_cmpeq(vsrc, zero);    /* val = abs(src) */    t0 = vec_sub(zero, vsrc);    val = vec_max(t0, vsrc);    /* val = val + val + 1 */    val = vec_add(val, val);    val = vec_add(val, one);    /* val = (val * quant) >> 5 */    vu32(t0) = vec_mule(vu16(val), vqmat);    vu32(t0) = vec_sra(vu32(t0), five);    vu16(t0) = vec_pack(vu32(t0), vu32(t0));    vu32(val) = vec_mulo(vu16(val), vqmat);    vu32(val) = vec_sra(vu32(val), five);    vu16(val) = vec_pack(vu32(val), vu32(val));    vu16(val) = vec_mergeh(vu16(t0), vu16(val));    /* val = val - 1&~(val|val==0) */    t0 = vec_or(val, eqzero);    t0 = vec_andc(one, t0);    val = vec_sub(val, t0);    /* val = samesign(src, val) */    t0 = vec_sub(zero, val);    val = vec_sel(val, t0, ltzero);    /* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */    val = vec_min(val, max);    val = vec_max(val, min);    /* if (src[i] == 0) dst[i] = 0 */    val = vec_andc(val, eqzero);    vec_st(val, 0, dst);    AMBER_STOP;}void iquant_non_intra_m2_altivec(IQUANT_NON_INTRA_PDECL){    int i;    vector signed short vsrc;    uint16_t *qmat;    vector unsigned short vqmat;    vector unsigned short vmquant;    vector signed short val, t0;    vector bool short ltzero, eqzero;    vector signed short zero, one;    vector unsigned int five;    vector signed short min, max;    vector signed int sum;    int offset, offset2;    union {	vector unsigned short vu16;	unsigned short mquant;	vector signed int vs32;	struct {	    signed int pad[3];	    signed int sum;	} s;    } vu;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(wsp->inter_q_mat))	mjpeg_error_exit1("iquant_non_intra_m2: wsp->inter_q_mat %% 16 != 0, "	    "(%d)", wsp->inter_q_mat);    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("iquant_non_intra_m2: src %% 16 != 0, (%d)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("iquant_non_intra_m2: dst %% 16 != 0, (%d)", dst);    for (i = 0; i < 64; i++)	if (src[i] < -256 || src[i] > 255)	    mjpeg_error_exit1("iquant_non_intra_m2: -256 > src[%i] > 255, (%d)",		i, src[i]);#endif /* }}} */    AMBER_START;    qmat = (uint16_t*)wsp->inter_q_mat;#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(64/8,1,0);    vec_dst(src, dsc.control, 0);    vec_dst(qmat, dsc.control, 1);#endif    /* vmquant = (vector unsigned short)(mquant); */    vu.mquant = (unsigned short)mquant;    vmquant = vec_splat(vu.vu16, 0);    sum = vec_splat_s32(0);    zero = vec_splat_s16(0);    one = vec_splat_s16(1);    five = vec_splat_u32(5);    /* max = (2047); min = (-2048); {{{ */    vu8(max) = vec_splat_u8(0x7);    t0 = vec_splat_s16(-1); /* 0xffff */    vu8(max) = vec_mergeh(vu8(max), vu8(t0)); /* 0x07ff == 2047 */    min = vec_sub(t0, max);    /* }}} */    offset = 0;#if 1    vsrc = vec_ld(offset, (signed short*)src);    vqmat = vec_ld(offset, (unsigned short*)qmat);    i = (64/8) - 1;    do {	/* inter_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = val + val + 1 */	val = vec_add(val, val);	val = vec_add(val, one);	/* val = (val * quant) >> 5 */	vu32(t0) = vec_mule(vu16(val), vqmat);	vu32(t0) = vec_sra(vu32(t0), five);	vu16(t0) = vec_pack(vu32(t0), vu32(t0));	vu32(val) = vec_mulo(vu16(val), vqmat);	vu32(val) = vec_sra(vu32(val), five);	vu16(val) = vec_pack(vu32(val), vu32(val));	vu16(val) = vec_mergeh(vu16(t0), vu16(val));	offset2 = offset;	offset += 8*sizeof(int16_t);	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* val = samesign(src, val) */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	/* if (src[i] == 0) dst[i] = 0 */	val = vec_andc(val, eqzero);	sum = vec_sum4s(val, sum);	vec_st(val, offset2, dst);    } while (--i);    /* inter_q[i] * mquant */    vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));    ltzero = vec_cmplt(vsrc, zero);    eqzero = vec_cmpeq(vsrc, zero);    /* val = abs(src) */    t0 = vec_sub(zero, vsrc);    val = vec_max(t0, vsrc);    /* val = val + val + 1 */    val = vec_add(val, val);    val = vec_add(val, one);    /* val = (val * quant) >> 5 */    vu32(t0) = vec_mule(vu16(val), vqmat);    vu32(t0) = vec_sra(vu32(t0), five);    vu16(t0) = vec_pack(vu32(t0), vu32(t0));    vu32(val) = vec_mulo(vu16(val), vqmat);    vu32(val) = vec_sra(vu32(val), five);    vu16(val) = vec_pack(vu32(val), vu32(val));    vu16(val) = vec_mergeh(vu16(t0), vu16(val));    /* val = samesign(src, val) */    t0 = vec_sub(zero, val);    val = vec_sel(val, t0, ltzero);    /* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */    val = vec_min(val, max);    val = vec_max(val, min);    /* if (src[i] == 0) dst[i] = 0 */    val = vec_andc(val, eqzero);    sum = vec_sum4s(val, sum);    vec_st(val, offset, dst);#else    /* {{{ */    i = (64/8);    do {	vsrc = vec_ld(offset, (signed short*)src);	vqmat = vec_ld(offset, (unsigned short*)qmat);	/* inter_q[i] * mquant */	vu16(vqmat) = vec_mulo(vu8(vqmat), vu8(vmquant));	ltzero = vec_cmplt(vsrc, zero);	eqzero = vec_cmpeq(vsrc, zero);	/* val = abs(src) */	t0 = vec_sub(zero, vsrc);	val = vec_max(t0, vsrc);	/* val = val + val + 1 */	val = vec_add(val, val);	val = vec_add(val, one);	/* val = (val * quant) >> 5 */	vu32(t0) = vec_mule(vu16(val), vqmat);	vu32(t0) = vec_sra(vu32(t0), five);	vu16(t0) = vec_pack(vu32(t0), vu32(t0));	vu32(val) = vec_mulo(vu16(val), vqmat);	vu32(val) = vec_sra(vu32(val), five);	vu16(val) = vec_pack(vu32(val), vu32(val));	vu16(val) = vec_mergeh(vu16(t0), vu16(val));	/* val = samesign(src, val) */	t0 = vec_sub(zero, val);	val = vec_sel(val, t0, ltzero);	/* val = (val > 2047) ? ((val < -2048) ? -2048 : val); */	val = vec_min(val, max);	val = vec_max(val, min);	/* if (src[i] == 0) dst[i] = 0 */	val = vec_andc(val, eqzero);	sum = vec_sum4s(val, sum);	vec_st(val, offset, dst);	offset += 8*sizeof(int16_t);    } while (--i);    /* }}} */#endif    /* mismatch control */    vu.vs32 = vec_sums(sum, vs32(zero));#if 1    dst[63] ^= !(vu.s.sum & 1);#else    if ((vu.s.sum & 1) == 0)	dst[63] ^= 1;#endif    AMBER_STOP;}/* iquant_non_intra_altivec_verify {{{ */#if ALTIVEC_TEST_FUNCTION(iquant_non_intra_m1) || \    ALTIVEC_TEST_FUNCTION(iquant_non_intra_m2)static void iquant_non_intra_altivec_verify(IQUANT_NON_INTRA_PDECL,    void (*test)(IQUANT_NON_INTRA_PDECL),    void (*verify)(IQUANT_NON_INTRA_PDECL)){    int i;    unsigned long checksum1, checksum2;    int16_t srccpy[64], dstcpy[64];    uint16_t *qmat;    qmat = (uint16_t*)wsp->inter_q_mat;    /* in case src == dst */    memcpy(srccpy, src, 64*sizeof(int16_t));    (*test)(IQUANT_NON_INTRA_ARGS);    for (checksum1 = i = 0; i < 64; i++)	checksum1 += dst[i];    memcpy(dstcpy, dst, 64*sizeof(int16_t));    memcpy(src, srccpy, 64*sizeof(int16_t));    (*verify)(IQUANT_NON_INTRA_ARGS);    for (checksum2 = i = 0; i < 64; i++)	checksum2 += dst[i];    if (checksum1 != checksum2) {	mjpeg_debug("iquant_non_intra(" IQUANT_NON_INTRA_PFMT ")",	    IQUANT_NON_INTRA_ARGS);	mjpeg_debug("iquant_non_intra: checksums differ %d != %d",		    checksum1, checksum2);    }    for (i = 0; i < 64; i++) {	if (dstcpy[i] != dst[i]) {	    mjpeg_debug("iquant_non_intra: src[%d]=%d, qmat=%d, "			"dst %d != %d", i, srccpy[i], qmat[i]*mquant,			dstcpy[i], dst[i]);	}    }}#endif /* }}} */#if ALTIVEC_TEST_FUNCTION(iquant_non_intra_m1) /* {{{ */#  ifdef ALTIVEC_VERIFYvoid iquant_non_intra_m1_altivec_verify(IQUANT_NON_INTRA_PDECL){    iquant_non_intra_altivec_verify(IQUANT_NON_INTRA_ARGS,	iquant_non_intra_m1_altivec, ALTIVEC_TEST_WITH(iquant_non_intra_m1));}#  elseALTIVEC_TEST(iquant_non_intra_m1, void, (IQUANT_NON_INTRA_PDECL),    IQUANT_NON_INTRA_PFMT, IQUANT_NON_INTRA_ARGS);#  endif#endif /* }}} */#if ALTIVEC_TEST_FUNCTION(iquant_non_intra_m2) /* {{{ */#  ifdef ALTIVEC_VERIFYvoid iquant_non_intra_m2_altivec_verify(IQUANT_NON_INTRA_PDECL){    iquant_non_intra_altivec_verify(IQUANT_NON_INTRA_ARGS,	iquant_non_intra_m2_altivec, ALTIVEC_TEST_WITH(iquant_non_intra_m2));}#  elseALTIVEC_TEST(iquant_non_intra_m2, void, (IQUANT_NON_INTRA_PDECL),    IQUANT_NON_INTRA_PFMT, IQUANT_NON_INTRA_ARGS);#  endif#endif /* }}} *//* vim:set foldmethod=marker foldlevel=0: */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -