idct.c

来自「Motion JPEG编解码器源代码」· C语言代码 · 共 420 行 · 第 1/2 页
420 行
/* idct.c, this file is part of the * AltiVec optimized library for MJPEG tools MPEG-1/2 Video Encoder * Copyright (C) 2003  James Klicman <james@klicman.org> * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#ifdef HAVE_CONFIG_H#include <config.h>#endif#include "altivec_conf.h"#include "vectorize.h"#include "../mjpeg_logging.h"/* #define AMBER_ENABLE */#include "amber.h"#ifdef HAVE_ALTIVEC_H/* include last to ensure AltiVec type semantics, especially for bool. */#include <altivec.h>#endif#define W1        1.38703989982604980468750000 /* sqrt(2)*cos(1*PI/16) */#define W2        1.30656301975250244140625000 /* sqrt(2)*cos(2*PI/16) */#define W3        1.17587554454803466796875000 /* sqrt(2)*cos(3*PI/16) */#define W5        0.78569495677947998046875000 /* sqrt(2)*cos(5*PI/16) */#define W6        0.54119610786437988281250000 /* sqrt(2)*cos(6*PI/16) */#define W7        0.27589938044548034667968750 /* sqrt(2)*cos(7*PI/16) */#define SQRT_0_5  0.70710676908493041992187500 /* sqrt(0.5) */#define DIVBY8    0.125                        /* 1/8 */static vector float idctconsts[3] = {    (vector float)VCONST(       W7,    W1-W7,    W1+W7,       W3 ),    (vector float)VCONST(    W3-W5,    W3+W5,       W6,    W2+W6 ),    (vector float)VCONST(    W2-W6, SQRT_0_5,   DIVBY8,        0 )};#define LD_W7       vec_splat(cnsts0, 0)#define LD_W1mW7    vec_splat(cnsts0, 1)#define LD_W1pW7    vec_splat(cnsts0, 2)#define LD_W3       vec_splat(cnsts0, 3)#define LD_W3mW5    vec_splat(cnsts1, 0)#define LD_W3pW5    vec_splat(cnsts1, 1)#define LD_W6       vec_splat(cnsts1, 2)#define LD_W2pW6    vec_splat(cnsts1, 3)#define LD_W2mW6    vec_splat(cnsts2, 0)#define LD_SQRT_0_5 vec_splat(cnsts2, 1)#define LD_DIVBY8   vec_splat(cnsts2, 2)#define IDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                   \    x0 = b0;                                                         \    x1 = b4;                                                         \    x2 = b6;                                                         \    x3 = b2;                                                         \    x4 = b1;                                                         \    x5 = b7;                                                         \    x6 = b5;                                                         \    x7 = b3;                                                         \                                                                     \    /* first stage */                                                \    cnst = LD_W7;                                                    \    x8 = vec_add(x4, x5);                                            \    x8 = vec_madd(cnst, x8, mzero);     /* x8 = W7*(x4+x5); */       \    cnst = LD_W1mW7;                                                 \    x4 = vec_madd(cnst, x4, x8);        /* x4 = x8 + (W1-W7)*x4; */  \    cnst = LD_W1pW7;                                                 \    x5 = vec_nmsub(cnst, x5, x8);       /* x5 = x8 - (W1+W7)*x5; */  \    cnst = LD_W3;                                                    \    x8 = vec_add(x6, x7);                                            \    x8 = vec_madd(cnst, x8, mzero);     /* x8 = W3*(x6+x7); */       \    cnst = LD_W3mW5;                                                 \    x6 = vec_nmsub(cnst, x6, x8);       /* x6 = x8 - (W3-W5)*x6; */  \    cnst = LD_W3pW5;                                                 \    x7 = vec_nmsub(cnst, x7, x8);       /* x7 = x8 - (W3+W5)*x7; */  \                                                                     \    /* second stage */                                               \    x8 = vec_add(x0, x1);               /* x8 = x0 + x1; */          \    x0 = vec_sub(x0, x1);               /* x0 -= x1; */              \    cnst = LD_W6;                                                    \    x1 = vec_add(x3, x2);                                            \    x1 = vec_madd(cnst, x1, mzero);     /* x1 = W6*(x3+x2); */       \    cnst = LD_W2pW6;                                                 \    x2 = vec_nmsub(cnst, x2, x1);       /* x2 = x1 - (W2+W6)*x2; */  \    cnst = LD_W2mW6;                                                 \    x3 = vec_madd(cnst, x3, x1);        /* x3 = x1 + (W2-W6)*x3; */  \    x1 = vec_add(x4, x6);               /* x1 = x4 + x6; */          \    x4 = vec_sub(x4, x6);               /* x4 -= x6; */              \    x6 = vec_add(x5, x7);               /* x6 = x5 + x7; */          \    x5 = vec_sub(x5, x7);               /* x5 -= x7; */              \                                                                     \    /* third stage */                                                \    x7 = vec_add(x8, x3);               /* x7 = x8 + x3; */          \    x8 = vec_sub(x8, x3);               /* x8 -= x3; */              \    x3 = vec_add(x0, x2);               /* x3 = x0 + x2; */          \    x0 = vec_sub(x0, x2);               /* x0 -= x2; */              \    cnst = LD_SQRT_0_5;                                              \    x2 = vec_add(x4, x5);                                            \    x2 = vec_madd(cnst, x2, mzero);     /* x2 = SQRT_0_5*(x4+x5); */ \    x4 = vec_sub(x4, x5);                                            \    x4 = vec_madd(cnst, x4, mzero);     /* x4 = SQRT_0_5*(x4-x5); */ \                                                                     \    /* fourth stage */                                               \    b0 = vec_add(x7, x1);               /* x7+x1 */                  \    b1 = vec_add(x3, x2);               /* x3+x2 */                  \    b2 = vec_add(x0, x4);               /* x0+x4 */                  \    b3 = vec_add(x8, x6);               /* x8+x6 */                  \    b4 = vec_sub(x8, x6);               /* x8-x6 */                  \    b5 = vec_sub(x0, x4);               /* x0-x4 */                  \    b6 = vec_sub(x3, x2);               /* x3-x2 */                  \    b7 = vec_sub(x7, x1);               /* x7-x1 */                  \    /* }}} */#define IDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                   \    x0 = b0;                                                         \    x1 = b4;                                                         \    x2 = b6;                                                         \    x3 = b2;                                                         \    x4 = b1;                                                         \    x5 = b7;                                                         \    x6 = b5;                                                         \    x7 = b3;                                                         \                                                                     \    /* first stage */                                                \    cnst = LD_W7;                                                    \    x8 = vec_add(x4, x5);                                            \    x8 = vec_madd(cnst, x8, mzero);     /* x8 = W7*(x4+x5); */       \    cnst = LD_W1mW7;                                                 \    x4 = vec_madd(cnst, x4, x8);        /* x4 = (x8+(W1-W7)*x4); */  \    cnst = LD_W1pW7;                                                 \    x5 = vec_nmsub(cnst, x5, x8);       /* x5 = (x8-(W1+W7)*x5); */  \    cnst = LD_W3;                                                    \    x8 = vec_add(x6, x7);                                            \    x8 = vec_madd(cnst, x8, mzero);     /* x8 = W3*(x6+x7); */       \    cnst = LD_W3mW5;                                                 \    x6 = vec_nmsub(cnst, x6, x8);       /* x6 = (x8-(W3-W5)*x6); */  \    cnst = LD_W3pW5;                                                 \    x7 = vec_nmsub(cnst, x7, x8);       /* x7 = (x8-(W3+W5)*x7); */  \                                                                     \    /* second stage */                                               \    cnst = LD_DIVBY8;                                                \    x8 = vec_add(x0, x1);                                            \    x8 = vec_madd(x8, cnst, mzero);     /* x8 = (x0 + x1); */        \    x0 = vec_sub(x0, x1);                                            \    x0 = vec_madd(x0, cnst, mzero);     /* x0 = (x0 - x1); */        \    cnst = LD_W6;                                                    \    x1 = vec_add(x3, x2);                                            \    x1 = vec_madd(cnst, x1, mzero);     /* x1 = W6*(x3+x2); */       \    cnst = LD_W2pW6;                                                 \    x2 = vec_nmsub(cnst, x2, x1);       /* x2 = (x1-(W2+W6)*x2); */  \    cnst = LD_W2mW6;                                                 \    x3 = vec_madd(cnst, x3, x1);        /* x3 = (x1+(W2-W6)*x3); */  \    x1 = vec_add(x4, x6);               /* x1 = x4 + x6; */          \    x4 = vec_sub(x4, x6);               /* x4 -= x6; */              \    x6 = vec_add(x5, x7);               /* x6 = x5 + x7; */          \    x5 = vec_sub(x5, x7);               /* x5 -= x7; */              \                                                                     \    /* third stage */                                                \    x7 = vec_add(x8, x3);               /* x7 = x8 + x3; */          \    x8 = vec_sub(x8, x3);               /* x8 -= x3; */              \    x3 = vec_add(x0, x2);               /* x3 = x0 + x2; */          \    x0 = vec_sub(x0, x2);               /* x0 -= x2; */              \    cnst = LD_SQRT_0_5;                                              \    x2 = vec_add(x4, x5);                                            \    x2 = vec_madd(cnst, x2, mzero);     /* x2 = SQRT_0_5*(x4+x5); */ \    x4 = vec_sub(x4, x5);                                            \    x4 = vec_madd(cnst, x4, mzero);     /* x4 = SQRT_0_5*(x4-x5); */ \                                                                     \    /* fourth stage */                                               \    b0 = vec_add(x7, x1); /* x7+x1 */                                \    b1 = vec_add(x3, x2); /* x3+x2 */                                \    b2 = vec_add(x0, x4); /* x0+x4 */                                \    b3 = vec_add(x8, x6); /* x8+x6 */                                \    b4 = vec_sub(x8, x6); /* x8-x6 */                                \    b5 = vec_sub(x0, x4); /* x0-x4 */                                \    b6 = vec_sub(x3, x2); /* x3-x2 */                                \    b7 = vec_sub(x7, x1); /* x7-x1 */                                \    /* }}} */#define IDCT_PDECL short *block#define IDCT_ARGS block#define IDCT_PFMT "block=0x%X"/* two dimensional inverse discrete cosine transform */void idct_altivec(IDCT_PDECL){    vector signed short *bp;    vector float *cp;    vector float b00, b10, b20, b30, b40, b50, b60, b70;    vector float b01, b11, b21, b31, b41, b51, b61, b71;    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
idct.c - 源码说明

本页面展示了「Motion JPEG编解码器源代码」中的 idct.c 源码文件，采用 C语言编程语言编写，共 420 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Motion相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?