⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pred_comp.c

📁 Motion JPEG编解码器源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
/* pred_comp.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2002  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_predict.h"#include "vectorize.h"#include "../mjpeg_logging.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define PRED_COMP_PDECL /* {{{ */                                            \    uint8_t *src,                                                            \    uint8_t *dst,                                                            \    int lx,                                                                  \    int w, int h,                                                            \    int x, int y,                                                            \    int dx, int dy,                                                          \    int addflag                                                              \    /* }}} */#define PRED_COMP_ARGS src, dst, lx, w, h, x, y, dx, dy, addflag#define PRED_COMP_PFMT /* {{{ */                                             \    "src=0x%X, dst=0x%X, lx=%d, w=%d, h=%d, x=%d, y=%d,"                     \    " dx=%d, dy=%d, addflag=%d"                                              \    /* }}} *//* low level prediction routine * * src:     prediction source * dst:     prediction destination * lx:      line width (for both src and dst) * x,y:     destination coordinates * dx,dy:   half pel motion vector * w,h:     size of prediction block * addflag: store or add prediction * * src % 16 == 0 * dst % 16 == 0 * lx % 16 == 0 * w == 16 | 8 * h == 16 | 8 | 4 interlaced * w == h | h*2 interlaced */void pred_comp_altivec(PRED_COMP_PDECL){    int xint, xh, yint, yh;    int i, j;    uint8_t *s, *d;    vector unsigned char lS, lS0, lS1, lS2, lS3, lD, perm, perm1;    vector unsigned short s0H, s0L, s1H, s1L, s2H, s2L, s3H, s3L, dH, dL;    vector unsigned char zero, eight;    vector unsigned short one, two;    AMBER_START;    /* half pel scaling */    xint = dx>>1; /* integer part */    xh = dx & 1;  /* half pel flag */    yint = dy>>1;    yh = dy & 1;    /* origins */    s = src + lx*(y+yint) + (x+xint); /* motion vector */    d = dst + lx*y + x;#ifdef ALTIVEC_VERIFY /* {{{ */    if (NOT_VECTOR_ALIGNED(src))	mjpeg_error_exit1("pred_comp: src %% 16 != 0, (0x%X)", src);    if (NOT_VECTOR_ALIGNED(dst))	mjpeg_error_exit1("pred_comp: dst %% 16 != 0, (0x%X)", dst);    if (NOT_VECTOR_ALIGNED(lx))	mjpeg_error_exit1("pred_comp: lx %% 16 != 0, (%d)", lx);    if (w != 16 && w != 8)	mjpeg_error_exit1("pred_comp: w != 16|8, (%d)", w);    if (h != 16 && h != 8 && h != 4)	mjpeg_error_exit1("pred_comp: h != 16|8|4, (%d)", h);    if (w != h && w != (h * 2))	mjpeg_error_exit1("pred_comp: w != h|(h*2), (w=%d, h=%d)", w, h);#ifdef ALTIVEC_DST    if (lx & (~0xffff) != 0)	mjpeg_error_exit1("pred_comp: lx > vec_dst range", lx);#endif    if (((unsigned long)d & 0x7) != 0)	mjpeg_error_exit1("pred_comp: d %% 8 != 0 (0x%X)", d);    if (((unsigned long)d & 0xf) > w)	mjpeg_error_exit1("pred_comp: (d & 0xf) > w (d=0x%X, w=%d)", d, w);#endif /* }}} */    if (xh) {	if  (yh) {	    /* (xh && yh) {{{ */	    if (addflag) {		/* {{{ */		zero = vec_splat_u8(0);		one = vec_splat_u16(1);		two = vec_splat_u16(2);		if (w == 8) {		    /* {{{ */		    eight = vec_splat_u8(8);		    perm = vec_lvsl(0, (unsigned char*)s);		    if (((unsigned long)d & 0xf) == 0) {			/* {{{ */			lS0 = vec_ld(0, (unsigned char*)s);			lS1 = vec_ld(16, (unsigned char*)s);			lS0 = vec_perm(lS0, lS1, perm);			lS1 = vec_slo(lS0, eight);			s += lx;			for (i = 0; i < h; i++) {			    lS2 = vec_ld(0, (unsigned char*)s);			    lS3 = vec_ld(16, (unsigned char*)s);			    lS2 = vec_perm(lS2, lS3, perm);			    lS3 = vec_slo(lS2, eight);			    lD = vec_ld(0, (unsigned char*)d);			    /* (unsigned short[]) s[0-7] */			    vu8(s0H) = vec_mergeh(zero, lS0);			    /* (unsigned short[]) s[1-8] */			    vu8(s1H) = vec_mergeh(zero, lS1);			    /* (unsigned short[]) s+lx[0-7] */			    vu8(s2H) = vec_mergeh(zero, lS2);			    /* (unsigned short[]) s+lx[1-8] */			    vu8(s3H) = vec_mergeh(zero, lS3);			    /* (unsigned short[]) d[0-7] */			    vu8(dH) = vec_mergeh(zero, lD);			    /* (unsigned short[]) d[8-15] */			    vu8(dL) = vec_mergel(zero, lD);			    /* s[i] + s[i+1] */			    s0H = vec_add(s0H, s1H);			    /* s[i+lx] + s[i+lx+1] */			    s2H = vec_add(s2H, s3H);			    /* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */			    s0H = vec_add(s0H, s2H);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */			    s0H = vec_add(s0H, two);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */			    s0H = vec_sra(s0H, two);			    /* d[i] + ((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2) */			    dH = vec_add(dH, s0H);			    /* (d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			     *  + 1			     */			    dH = vec_add(dH, one);			    /* ((d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			     *  + 1) >> 1			     */			    dH = vec_sra(dH, one);			    /* (unsigned char[]) d[0-7], d[8-15] */			    vu8(dH) = vec_packsu(dH, dL);			    vec_st(vu8(dH), 0, (unsigned char*)d);			    lS0 = lS2;			    lS1 = lS3;			    s += lx;			    d += lx;			}			/* }}} */		    } else {			/* {{{ */			/* d is offset 8 bytes, work on dL (low bytes).			 */			lS0 = vec_ld(0, (unsigned char*)s);			lS1 = vec_ld(16, (unsigned char*)s);			lS0 = vec_perm(lS0, lS1, perm);			lS1 = vec_slo(lS0, eight);			s += lx;			for (i = 0; i < h; i++) {			    lS2 = vec_ld(0, (unsigned char*)s);			    lS3 = vec_ld(16, (unsigned char*)s);			    lS2 = vec_perm(lS2, lS3, perm);			    lS3 = vec_slo(lS2, eight);			    lD = vec_ld(0, (unsigned char*)d);			    /* (unsigned short[]) s[0-7] */			    vu8(s0H) = vec_mergeh(zero, lS0);			    /* (unsigned short[]) s[1-8] */			    vu8(s1H) = vec_mergeh(zero, lS1);			    /* (unsigned short[]) s+lx[0-7] */			    vu8(s2H) = vec_mergeh(zero, lS2);			    /* (unsigned short[]) s+lx[1-8] */			    vu8(s3H) = vec_mergeh(zero, lS3);			    /* (unsigned short[]) d[0-7] */			    vu8(dH) = vec_mergeh(zero, lD);			    /* (unsigned short[]) d[8-15] */			    vu8(dL) = vec_mergel(zero, lD);			    /* s[i] + s[i+1] */			    s0H = vec_add(s0H, s1H);			    /* s[i+lx] + s[i+lx+1] */			    s2H = vec_add(s2H, s3H);			    /* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */			    s0H = vec_add(s0H, s2H);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */			    s0H = vec_add(s0H, two);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */			    s0H = vec_sra(s0H, two);			    /* d[i] + ((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2) */			    dL = vec_add(dL, s0H);			    /* (d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			     *  + 1			     */			    dL = vec_add(dL, one);			    /* ((d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			     *  + 1) >> 1			     */			    dL = vec_sra(dL, one);			    /* (unsigned char[]) d[0-7], d[8-15] */			    vu8(dH) = vec_packsu(dH, dL);			    vec_st(vu8(dH), 0, (unsigned char*)d);			    lS0 = lS2;			    lS1 = lS3;			    s += lx;			    d += lx;			}			/* }}} */		    }		    /* }}} */		} else /* w == 16 */ {		    /* {{{ */		    perm = vec_lvsl(0, s);		    perm1 = vec_splat_u8(1);		    perm1 = vec_add(perm, perm1);		    lS = vec_ld(0, (unsigned char*)s);		    lS1 = vec_ld(16, (unsigned char*)s);		    lS0 = vec_perm(lS, lS1, perm);		    lS1 = vec_perm(lS, lS1, perm1);		    s += lx;		    for (i = 0; i < h; i++) {			lS = vec_ld(0, (unsigned char*)s);			lS3 = vec_ld(16, (unsigned char*)s);			lS2 = vec_perm(lS, lS3, perm);			lS3 = vec_perm(lS, lS3, perm1);			lD = vec_ld(0, (unsigned char*)d);			/* (unsigned short[]) s[0-7] */			vu8(s0H) = vec_mergeh(zero, lS0);			/* (unsigned short[]) s[8-15] */			vu8(s0L) = vec_mergel(zero, lS0);			/* (unsigned short[]) s[1-8] */			vu8(s1H) = vec_mergeh(zero, lS1);			/* (unsigned short[]) s[9-16] */			vu8(s1L) = vec_mergel(zero, lS1);			/* (unsigned short[]) s+lx[0-7] */			vu8(s2H) = vec_mergeh(zero, lS2);			/* (unsigned short[]) s+lx[8-15] */			vu8(s2L) = vec_mergel(zero, lS2);			/* (unsigned short[]) s+lx[1-8] */			vu8(s3H) = vec_mergeh(zero, lS3);			/* (unsigned short[]) s+lx[9-16] */			vu8(s3L) = vec_mergel(zero, lS3);			/* (unsigned short[]) d[0-7] */			vu8(dH) = vec_mergeh(zero, lD);			/* (unsigned short[]) d[8-15] */			vu8(dL) = vec_mergel(zero, lD);			/* s[i] + s[i+1] */			s0H = vec_add(s0H, s1H);			s0L = vec_add(s0L, s1L);			/* s[i+lx] + s[i+lx+1] */			s2H = vec_add(s2H, s3H);			s2L = vec_add(s2L, s3L);			/* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */			s0H = vec_add(s0H, s2H);			s0L = vec_add(s0L, s2L);			/* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */			s0H = vec_add(s0H, two);			s0L = vec_add(s0L, two);			/* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */			s0H = vec_sra(s0H, two);			s0L = vec_sra(s0L, two);			/* d[i] + ((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2) */			dH = vec_add(dH, s0H);			dL = vec_add(dL, s0L);			/* (d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			 *  + 1			 */			dH = vec_add(dH, one);			dL = vec_add(dL, one);			/* ((d[i]+((s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2))			 *  + 1) >> 1			 */			dH = vec_sra(dH, one);			dL = vec_sra(dL, one);			/* (unsigned char[]) d[0-7], d[8-15] */			vu8(dH) = vec_packsu(dH, dL);			vec_st(vu8(dH), 0, (unsigned char*)d);			lS0 = lS2;			lS1 = lS3;			s += lx;			d += lx;		    }		    /* }}} */		}#if 0		for (j=0; j<h; j++)		{		    for (i=0; i<w; i++)			d[i] = (d[i] + ((unsigned int)(s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2)>>2)+1)>>1;		    s+= lx;		    d+= lx;		}#endif		/* }}} */	    } else /* !addflag */ {		/* {{{ */		zero = vec_splat_u8(0);		one = vec_splat_u16(1);		two = vec_splat_u16(2);		if (w == 8) {		    /* {{{ */		    eight = vec_splat_u8(8);		    perm = vec_lvsl(0, (unsigned char*)s);		    if (((unsigned long)d & 0xf) == 0) {			/* {{{ */			lS0 = vec_ld(0, (unsigned char*)s);			lS1 = vec_ld(16, (unsigned char*)s);			lS0 = vec_perm(lS0, lS1, perm);			lS1 = vec_slo(lS0, eight);			s += lx;			for (i = 0; i < h; i++) {			    lS2 = vec_ld(0, (unsigned char*)s);			    lS3 = vec_ld(16, (unsigned char*)s);			    lS2 = vec_perm(lS2, lS3, perm);			    lS3 = vec_slo(lS2, eight);			    lD = vec_ld(0, (unsigned char*)d);			    /* (unsigned short[]) s[0-7] */			    vu8(s0H) = vec_mergeh(zero, lS0);			    /* (unsigned short[]) s[1-8] */			    vu8(s1H) = vec_mergeh(zero, lS1);			    /* (unsigned short[]) s+lx[0-7] */			    vu8(s2H) = vec_mergeh(zero, lS2);			    /* (unsigned short[]) s+lx[1-8] */			    vu8(s3H) = vec_mergeh(zero, lS3);			    /* (unsigned short[]) d[8-15] */			    vu8(dL) = vec_mergel(zero, lD);			    /* s[i] + s[i+1] */			    s0H = vec_add(s0H, s1H);			    /* s[i+lx] + s[i+lx+1] */			    s2H = vec_add(s2H, s3H);			    /* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */			    s0H = vec_add(s0H, s2H);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */			    s0H = vec_add(s0H, two);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */			    s0H = vec_sra(s0H, two);			    /* (unsigned char[]) d[0-7], d[8-15] */			    vu8(dH) = vec_packsu(s0H, dL);			    vec_st(vu8(dH), 0, (unsigned char*)d);			    lS0 = lS2;			    lS1 = lS3;			    s += lx;			    d += lx;			}			/* }}} */		    } else {			/* {{{ */			/* d is offset 8 bytes, work on dL (low bytes).			 */			lS0 = vec_ld(0, (unsigned char*)s);			lS1 = vec_ld(16, (unsigned char*)s);			lS0 = vec_perm(lS0, lS1, perm);			lS1 = vec_slo(lS0, eight);			s += lx;			for (i = 0; i < h; i++) {			    lS2 = vec_ld(0, (unsigned char*)s);			    lS3 = vec_ld(16, (unsigned char*)s);			    lS2 = vec_perm(lS2, lS3, perm);			    lS3 = vec_slo(lS2, eight);			    lD = vec_ld(0, (unsigned char*)d);			    /* (unsigned short[]) s[0-7] */			    vu8(s0H) = vec_mergeh(zero, lS0);			    /* (unsigned short[]) s[1-8] */			    vu8(s1H) = vec_mergeh(zero, lS1);			    /* (unsigned short[]) s+lx[0-7] */			    vu8(s2H) = vec_mergeh(zero, lS2);			    /* (unsigned short[]) s+lx[1-8] */			    vu8(s3H) = vec_mergeh(zero, lS3);			    /* (unsigned short[]) d[0-7] */			    vu8(dH) = vec_mergeh(zero, lD);			    /* s[i] + s[i+1] */			    s0H = vec_add(s0H, s1H);			    /* s[i+lx] + s[i+lx+1] */			    s2H = vec_add(s2H, s3H);			    /* (s[i]+s[i+1]) + (s[i+lx]+s[i+lx+1]) */			    s0H = vec_add(s0H, s2H);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]) + 2 */			    s0H = vec_add(s0H, two);			    /* (s[i]+s[i+1]+s[i+lx]+s[i+lx+1]+2) >> 2 */			    s0H = vec_sra(s0H, two);			    /* (unsigned char[]) d[0-7], d[8-15] */			    vu8(dH) = vec_packsu(dH, s0H);			    vec_st(vu8(dH), 0, (unsigned char*)d);			    lS0 = lS2;			    lS1 = lS3;			    s += lx;			    d += lx;			}			/* }}} */		    }		    /* }}} */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -