📄 idct.c
字号:
/* idct.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2003 James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_conf.h"#include "vectorize.h"#include "../mjpeg_logging.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define W1 1.38703989982604980468750000 /* sqrt(2)*cos(1*PI/16) */#define W2 1.30656301975250244140625000 /* sqrt(2)*cos(2*PI/16) */#define W3 1.17587554454803466796875000 /* sqrt(2)*cos(3*PI/16) */#define W5 0.78569495677947998046875000 /* sqrt(2)*cos(5*PI/16) */#define W6 0.54119610786437988281250000 /* sqrt(2)*cos(6*PI/16) */#define W7 0.27589938044548034667968750 /* sqrt(2)*cos(7*PI/16) */#define SQRT_0_5 0.70710676908493041992187500 /* sqrt(0.5) */#define DIVBY8 0.125 /* 1/8 */static vector float idctconsts[3] = { (vector float)VCONST( W7, W1-W7, W1+W7, W3 ), (vector float)VCONST( W3-W5, W3+W5, W6, W2+W6 ), (vector float)VCONST( W2-W6, SQRT_0_5, DIVBY8, 0 )};#define LD_W7 vec_splat(cnsts0, 0)#define LD_W1mW7 vec_splat(cnsts0, 1)#define LD_W1pW7 vec_splat(cnsts0, 2)#define LD_W3 vec_splat(cnsts0, 3)#define LD_W3mW5 vec_splat(cnsts1, 0)#define LD_W3pW5 vec_splat(cnsts1, 1)#define LD_W6 vec_splat(cnsts1, 2)#define LD_W2pW6 vec_splat(cnsts1, 3)#define LD_W2mW6 vec_splat(cnsts2, 0)#define LD_SQRT_0_5 vec_splat(cnsts2, 1)#define LD_DIVBY8 vec_splat(cnsts2, 2)#define IDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ x0 = b0; \ x1 = b4; \ x2 = b6; \ x3 = b2; \ x4 = b1; \ x5 = b7; \ x6 = b5; \ x7 = b3; \ \ /* first stage */ \ cnst = LD_W7; \ x8 = vec_add(x4, x5); \ x8 = vec_madd(cnst, x8, mzero); /* x8 = W7*(x4+x5); */ \ cnst = LD_W1mW7; \ x4 = vec_madd(cnst, x4, x8); /* x4 = x8 + (W1-W7)*x4; */ \ cnst = LD_W1pW7; \ x5 = vec_nmsub(cnst, x5, x8); /* x5 = x8 - (W1+W7)*x5; */ \ cnst = LD_W3; \ x8 = vec_add(x6, x7); \ x8 = vec_madd(cnst, x8, mzero); /* x8 = W3*(x6+x7); */ \ cnst = LD_W3mW5; \ x6 = vec_nmsub(cnst, x6, x8); /* x6 = x8 - (W3-W5)*x6; */ \ cnst = LD_W3pW5; \ x7 = vec_nmsub(cnst, x7, x8); /* x7 = x8 - (W3+W5)*x7; */ \ \ /* second stage */ \ x8 = vec_add(x0, x1); /* x8 = x0 + x1; */ \ x0 = vec_sub(x0, x1); /* x0 -= x1; */ \ cnst = LD_W6; \ x1 = vec_add(x3, x2); \ x1 = vec_madd(cnst, x1, mzero); /* x1 = W6*(x3+x2); */ \ cnst = LD_W2pW6; \ x2 = vec_nmsub(cnst, x2, x1); /* x2 = x1 - (W2+W6)*x2; */ \ cnst = LD_W2mW6; \ x3 = vec_madd(cnst, x3, x1); /* x3 = x1 + (W2-W6)*x3; */ \ x1 = vec_add(x4, x6); /* x1 = x4 + x6; */ \ x4 = vec_sub(x4, x6); /* x4 -= x6; */ \ x6 = vec_add(x5, x7); /* x6 = x5 + x7; */ \ x5 = vec_sub(x5, x7); /* x5 -= x7; */ \ \ /* third stage */ \ x7 = vec_add(x8, x3); /* x7 = x8 + x3; */ \ x8 = vec_sub(x8, x3); /* x8 -= x3; */ \ x3 = vec_add(x0, x2); /* x3 = x0 + x2; */ \ x0 = vec_sub(x0, x2); /* x0 -= x2; */ \ cnst = LD_SQRT_0_5; \ x2 = vec_add(x4, x5); \ x2 = vec_madd(cnst, x2, mzero); /* x2 = SQRT_0_5*(x4+x5); */ \ x4 = vec_sub(x4, x5); \ x4 = vec_madd(cnst, x4, mzero); /* x4 = SQRT_0_5*(x4-x5); */ \ \ /* fourth stage */ \ b0 = vec_add(x7, x1); /* x7+x1 */ \ b1 = vec_add(x3, x2); /* x3+x2 */ \ b2 = vec_add(x0, x4); /* x0+x4 */ \ b3 = vec_add(x8, x6); /* x8+x6 */ \ b4 = vec_sub(x8, x6); /* x8-x6 */ \ b5 = vec_sub(x0, x4); /* x0-x4 */ \ b6 = vec_sub(x3, x2); /* x3-x2 */ \ b7 = vec_sub(x7, x1); /* x7-x1 */ \ /* }}} */#define IDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */ \ x0 = b0; \ x1 = b4; \ x2 = b6; \ x3 = b2; \ x4 = b1; \ x5 = b7; \ x6 = b5; \ x7 = b3; \ \ /* first stage */ \ cnst = LD_W7; \ x8 = vec_add(x4, x5); \ x8 = vec_madd(cnst, x8, mzero); /* x8 = W7*(x4+x5); */ \ cnst = LD_W1mW7; \ x4 = vec_madd(cnst, x4, x8); /* x4 = (x8+(W1-W7)*x4); */ \ cnst = LD_W1pW7; \ x5 = vec_nmsub(cnst, x5, x8); /* x5 = (x8-(W1+W7)*x5); */ \ cnst = LD_W3; \ x8 = vec_add(x6, x7); \ x8 = vec_madd(cnst, x8, mzero); /* x8 = W3*(x6+x7); */ \ cnst = LD_W3mW5; \ x6 = vec_nmsub(cnst, x6, x8); /* x6 = (x8-(W3-W5)*x6); */ \ cnst = LD_W3pW5; \ x7 = vec_nmsub(cnst, x7, x8); /* x7 = (x8-(W3+W5)*x7); */ \ \ /* second stage */ \ cnst = LD_DIVBY8; \ x8 = vec_add(x0, x1); \ x8 = vec_madd(x8, cnst, mzero); /* x8 = (x0 + x1); */ \ x0 = vec_sub(x0, x1); \ x0 = vec_madd(x0, cnst, mzero); /* x0 = (x0 - x1); */ \ cnst = LD_W6; \ x1 = vec_add(x3, x2); \ x1 = vec_madd(cnst, x1, mzero); /* x1 = W6*(x3+x2); */ \ cnst = LD_W2pW6; \ x2 = vec_nmsub(cnst, x2, x1); /* x2 = (x1-(W2+W6)*x2); */ \ cnst = LD_W2mW6; \ x3 = vec_madd(cnst, x3, x1); /* x3 = (x1+(W2-W6)*x3); */ \ x1 = vec_add(x4, x6); /* x1 = x4 + x6; */ \ x4 = vec_sub(x4, x6); /* x4 -= x6; */ \ x6 = vec_add(x5, x7); /* x6 = x5 + x7; */ \ x5 = vec_sub(x5, x7); /* x5 -= x7; */ \ \ /* third stage */ \ x7 = vec_add(x8, x3); /* x7 = x8 + x3; */ \ x8 = vec_sub(x8, x3); /* x8 -= x3; */ \ x3 = vec_add(x0, x2); /* x3 = x0 + x2; */ \ x0 = vec_sub(x0, x2); /* x0 -= x2; */ \ cnst = LD_SQRT_0_5; \ x2 = vec_add(x4, x5); \ x2 = vec_madd(cnst, x2, mzero); /* x2 = SQRT_0_5*(x4+x5); */ \ x4 = vec_sub(x4, x5); \ x4 = vec_madd(cnst, x4, mzero); /* x4 = SQRT_0_5*(x4-x5); */ \ \ /* fourth stage */ \ b0 = vec_add(x7, x1); /* x7+x1 */ \ b1 = vec_add(x3, x2); /* x3+x2 */ \ b2 = vec_add(x0, x4); /* x0+x4 */ \ b3 = vec_add(x8, x6); /* x8+x6 */ \ b4 = vec_sub(x8, x6); /* x8-x6 */ \ b5 = vec_sub(x0, x4); /* x0-x4 */ \ b6 = vec_sub(x3, x2); /* x3-x2 */ \ b7 = vec_sub(x7, x1); /* x7-x1 */ \ /* }}} */#define IDCT_PDECL short *block#define IDCT_ARGS block#define IDCT_PFMT "block=0x%X"/* two dimensional inverse discrete cosine transform */void idct_altivec(IDCT_PDECL){ vector signed short *bp; vector float *cp; vector float b00, b10, b20, b30, b40, b50, b60, b70; vector float b01, b11, b21, b31, b41, b51, b61, b71; vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -