⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 build_sub44_mests.c

📁 Motion JPEG编解码器源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
/* build_sub44_mests.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2002  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_motion.h"#include "vectorize.h"#include "../fastintfns.h"#include "../mjpeg_logging.h"#include <math.h>#include <stdlib.h>/* #define AMBER_ENABLE *//* #define AMBER_MAX_TRACES 10 */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define USE_SMR_PPC#ifdef USE_SMR_PPCextern int sub_mean_reduction_ppc(int len, me_result_set *set, int reduction);#endif/* C skim rule */#define SKIM(weight,threshold) (weight < threshold)/* MMX skim rule *//* #define SKIM(weight,threshold) (weight <= threshold) *//* Rough and-ready absolute distance penalty * NOTE: This penalty is *vital* to correct operation * as otherwise the sub-mean filtering won't work on very * uniform images. *//* C penalty calculation */#define DISTANCE_PENALTY(x,y) (intmax(abs(x - i0),abs(y - j0))<<1)/* MMX penalty calculation *//* #define DISTANCE_PENALTY(x,y) (intmax(abs(x),abs(y))<<2) *//* old MMX penalty calculation *//* #define DISTANCE_PENALTY(x,y) (abs(x)+abs(y)) *//* Do threshold lookahead? This also generates different results than * the C version, also slightly worse output. (should be faster though) */#undef THRESHOLD_LOOKAHEAD#define THRESHOLD#ifdef THRESHOLD#  define UPDATE_THRESHOLD(w,t) (t) = intmin((w) << 2, (t))#else#  define UPDATE_THRESHOLD(w,t)#endif#if 0#undef THRESHOLD#undef SKIM#define SKIM(w,t) 1#undef DISTANCE_PENALTY#define DISTANCE_PENALTY(x,y) 0#endif/* *  s44org % 16 == 0 *  s44blk % 4 == 0 *  h == 2 or 4 *  (rowstride % 16) == 0 *  (ihigh-ilow)+1 % 16 == 0 very often *  (jhigh-jlow)+1 % 16 == 0 very often */#define BUILD_SUB44_MESTS_PDECL /* {{{ */                                    \  me_result_set *sub44set,                                                   \  int ilow, int jlow, int ihigh, int jhigh,                                  \  int i0, int j0,                                                            \  int null_ctl_sad,                                                          \  uint8_t *s44org, uint8_t *s44blk,                                          \  int rowstride, int h,                                                      \  int reduction                                                              \  /* }}} */#define BUILD_SUB44_MESTS_ARGS /* {{{ */                                     \  sub44set,                                                                  \  ilow, jlow, ihigh, jhigh,                                                  \  i0, j0,                                                                    \  null_ctl_sad,                                                              \  s44org, s44blk,                                                            \  rowstride, h,                                                              \  reduction                                                                  \  /* }}} *//* int build_sub44_mests_altivec(BUILD_SUB44_MESTS_PDECL) {{{ */#if defined(ALTIVEC_VERIFY) && ALTIVEC_TEST_FUNCTION(build_sub44_mests)#define VERIFY_BUILD_SUB44_MESTS#define VERIFY_SADS(orgblk,s44blk,rowstride,h,sads,count) if (verify) \      { verify_sads(orgblk,s44blk,rowstride,h,sads,count); }/* declarations */static int _build_sub44_mests_altivec(BUILD_SUB44_MESTS_PDECL, int verify);static void verify_sads(unsigned char *orgblk, unsigned char* s44blk,			int rowstride, int h, unsigned int *sads, int count);int build_sub44_mests_altivec(BUILD_SUB44_MESTS_PDECL){  return _build_sub44_mests_altivec(BUILD_SUB44_MESTS_ARGS, 0 /* no verify */);}static int _build_sub44_mests_altivec(BUILD_SUB44_MESTS_PDECL, int verify)#else#define VERIFY_SADS(orgblk,s44blk,rowstride,h,sads,count) /* no verify */int build_sub44_mests_altivec(BUILD_SUB44_MESTS_PDECL)#endif/* }}} */{    int i, j;    int x, y;    int xlow, xl, x16, xl1, xl2, xl3;    uint8_t *currowblk, *curblk, *nextrowblk;    int threshold;    me_result_s *cres;    me_result_s mres;    vector unsigned char t1, t2, t3, perm;    vector unsigned char shift, shifter, increment;    vector unsigned char vr0, vr1;    vector unsigned char vx0y0, vx16y0,                         vx0y1, vx16y1;    vector signed int sads;    unsigned int *psads;    unsigned int *psad;#ifdef ALTIVEC_DST    DataStreamControl dsc;#endif#ifndef USE_SMR_PPC    int mean_weight;#endif#ifdef AMBER_ENABLE    int stop_amber = 0;#endif#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(s44org))	mjpeg_error_exit1("build_sub44_mests: s44org %% 16 != 0 (0x%X)",	    s44org);    if (((unsigned long)s44blk) & 0x3 != 0)	mjpeg_error_exit1("build_sub44_mests: s44blk %% 4 != 0 (0x%X)",	    s44blk);    if (NOT_VECTOR_ALIGNED(rowstride))	mjpeg_error_exit1("build_sub44_mests: rowstride %% 16 != 0 (%d)",		rowstride);    if (h != 2 && h != 4)	mjpeg_error_exit1("build_sub44_mests: h != [2|4], (%d)", h);#endif /* }}} */#ifdef AMBER_ENABLE    /* enable amber for non-edge bound search radii */    if (((ihigh - ilow) >> 1) == (ihigh - i0) &&        ((jhigh - jlow) >> 1) == (jhigh - j0))    {	stop_amber = 1;	AMBER_START;    }#endif#ifdef ALTIVEC_DST    dsc.control = DATA_STREAM_CONTROL(1,0,0);    dsc.block.count = h;    dsc.block.stride = rowstride;    vec_dst(s44blk, dsc.control, 0);    xl = ((ihigh - ilow) >> 2) + 1;    currowblk = s44org+(ilow>>2)+rowstride*(jlow>>2);    xl1 = (xl + 3 + 15 + ((unsigned long)currowblk & 0xf)) >> 4;    dsc.block.size = xl1;    vec_dst(currowblk, dsc.control, 1);    dsc.block.count = 1;   /* loading one row at a time from now on */#else    xl = ((ihigh - ilow) >> 2) + 1;    currowblk = s44org+(ilow>>2)+rowstride*(jlow>>2);    xl1 = (xl + 3 + 15 + ((unsigned long)currowblk & 0xf)) >> 4;#endif    /* shift = (0x00010203, 0x01020304, 0x02030405, 0x03040506) {{{ */    shift = vec_lvsl(0, (unsigned char*) 0);    /* tmp(shifter) = (0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3) {{{ */    shifter = vec_splat_u8(2);    shifter = vec_sr(shift /* lvsl */, shifter /* (2) */ );    /* }}} */    vu32(shift) = vec_splat(vu32(shift), 0); /* (0x00010203, ...) */    shift = vec_add(shift, shifter);    increment = vec_splat_u8(4);    /* }}} */    threshold = 6*null_ctl_sad / (4*4*reduction);    y     = jlow  - j0;    xlow  = ilow  - i0;    perm = vec_lvsl(0, (unsigned char*)currowblk);    perm = vec_splat(perm, 0);    if (xl1 < 3) {	/* if (xl1 == 1) # of vec_ld = ((yl + h - 1) */	/* if (xl1 == 2) # of vec_ld = ((yl + h - 1) * 2) */	x16 = 16 * (xl1 >> 1); /* if (xl1 == 1) x16=0  if (xl1 == 2) x16=16 */	xl1 = xl;	xl2 = 0;	xl3 = 0;    } else {	int sh;	/* else (xl1 > 2) # of vec_ld = ((yl + h - 1) * (xl1)) */	x16 = 16;	xl1 = 16;	xl2 = xl - 16;	xl3 = (((xl2 & 0xf) + 3) >> 2);	sh = (xl2 >> 4) * 3;	xl2 = (xl2 + 15) >> 4; /* (xl2 + 15) / 16 */	xl3 = (xl3 << sh) | (((unsigned int)(~0) >> (32 - sh)) & 0x4924);    }    xl1 = (xl1 + 3) >> 2;  /* (xl1 + 3) / 4 */    j = ((jhigh - jlow) >> 2) + 1;    vr0 = vec_ld(0, (unsigned char*)s44blk);    vr1 = vec_ld(rowstride, (unsigned char*)s44blk);    t1 = vec_lvsl(0, (unsigned char*)s44blk);    vu32(t1) = vec_splat(vu32(t1), 0);    vr0 = vec_perm(vr0, vr0, t1);    vr1 = vec_perm(vr1, vr1, t1);    vx0y0 = vec_ld(0, (unsigned char*)currowblk);    vx16y0 = vec_ld(x16, (unsigned char*)currowblk);    cres = sub44set->mests;    if (h < 4) {	/* {{{ */	nextrowblk = currowblk + rowstride;	do /* while (--j) */	{	    vx0y1 = vec_ld(0, (unsigned char*)nextrowblk);	    vx16y1 = vec_ld(x16, (unsigned char*)nextrowblk);	    nextrowblk += rowstride;#ifdef ALTIVEC_DST	    vec_dst(nextrowblk, dsc.control, 0);#endif	    shifter = vec_add(shift, perm);	    /* vector align for vec_st */	    psad = psads = (unsigned int*)(((unsigned long)cres + 15) & (~0xf));	    /* calculating sads in the X direction 4 at a time. */	    i = xl1;	    do	    {		sads = vec_splat_s32(0);		t1 = vec_perm(vx0y0, vx16y0, shifter);		t2 = vec_max(t1, vr0);  /* find largest of two      */  		t3 = vec_min(t1, vr0);  /* find smaller of two      */  		t3 = vec_sub(t2, t3);   /* find absolute difference */  		vu32(sads) = vec_sum4s(t3, vu32(sads));		t1 = vec_perm(vx0y1, vx16y1, shifter);		t2 = vec_max(t1, vr1);		t3 = vec_min(t1, vr1);		t3 = vec_sub(t2, t3);		vu32(sads) = vec_sum4s(t3, vu32(sads));		vec_st(vu32(sads), 0, psad);		psad += 4;		/* increment permute for next iteration */		shifter = vec_add(shifter, increment);	    } while (--i);	    if (xl2) {		vector unsigned char vn0y0, vn16y0, vn0y1, vn16y1;		int i2, i3;		curblk = currowblk + 16; /* update to current pointer */		vn16y0 = vec_sld(vx16y0, vx16y0, 0); /* vn16y0 = vx16y0 (VPU) */		vn16y1 = vec_or(vx16y1, vx16y1);    /* vn16y1 = vx16y1 (VALU) */		i = xl2;		i2 = xl3;		do {		    curblk += 16; /* update to next pointer */		    vn0y0 = vec_sld(vn16y0, vn16y0, 0);		    vn16y0 = vec_ld(0, (unsigned char*)curblk);		    vn0y1 = vec_or(vn16y1, vn16y1);		    vn16y1 = vec_ld(rowstride, (unsigned char*)curblk);		    shifter = vec_add(shift, perm);		    i3 = i2 & 0x7;		    i2 >>= 3;		    do		    {			sads = vec_splat_s32(0);			t1 = vec_perm(vn0y0, vn16y0, shifter);			t2 = vec_max(t1, vr0);  /* find largest of two      */  			t3 = vec_min(t1, vr0);  /* find smaller of two      */  			t3 = vec_sub(t2, t3);   /* find absolute difference */  			vu32(sads) = vec_sum4s(t3, vu32(sads));			t1 = vec_perm(vn0y1, vn16y1, shifter);			t2 = vec_max(t1, vr1);			t3 = vec_min(t1, vr1);			t3 = vec_sub(t2, t3);			vu32(sads) = vec_sum4s(t3, vu32(sads));			vec_st(vu32(sads), 0, psad);			psad += 4;			/* increment permute for next iteration */			shifter = vec_add(shifter, increment);		    } while (--i3);		    /*		    vn0y0 = vec_sld(vn16y0, vn16y0, 0);		    vn0y1 = vec_or(vn16y1, vn16y1);		    */		} while (--i);	    }	    #ifdef ALTIVEC_VERIFY	    VERIFY_SADS(currowblk, s44blk, rowstride, h, psads, xl);#endif	    psad = psads;	    mres.y = (int8_t)y;	    x = xlow;	    i = xl >> 2;	    while (i--) {		int w0, w1, w2, w3, tx;		w0 = *psad;		psad++;		w1 = *psad;		psad++;		w2 = *psad;		psad++;		w3 = *psad;		psad++;		if (SKIM(w0, threshold)) {		    UPDATE_THRESHOLD(w0,threshold);		    mres.weight = (uint16_t)(w0 + DISTANCE_PENALTY(x,y));		    mres.x = (int8_t)x;		    *cres = mres;		    cres++;		}		if (SKIM(w1, threshold)) {		    UPDATE_THRESHOLD(w1,threshold);		    tx = x + 4;		    mres.weight = (uint16_t)(w1 + DISTANCE_PENALTY(tx,y));		    mres.x = (int8_t)tx;		    *cres = mres;		    cres++;		}		if (SKIM(w2, threshold)) {		    UPDATE_THRESHOLD(w2,threshold);		    tx = x + 8;		    mres.weight = (uint16_t)(w2 + DISTANCE_PENALTY(tx,y));		    mres.x = (int8_t)tx;		    *cres = mres;		    cres++;		}		if (SKIM(w3, threshold)) {		    UPDATE_THRESHOLD(w3,threshold);		    tx = x + 12;		    mres.weight = (uint16_t)(w3 + DISTANCE_PENALTY(tx,y));		    mres.x = (int8_t)tx;		    *cres = mres;		    cres++;		}		x += 16;	    }	    i = xl & 0x3;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -