📄 aan_new.c
字号:
//;***************************************************************************/
//;* Copyright (c) 1998 Intel Corporation.
//;* All rights reserved.
//;***************************************************************************/
//
//
//* aan.cpp - 2d idct using aan algorithm
#include <math.h>
//#include "Gen_math.h"
typedef short var;
short int_val(float f);
//for IDCT
#define B1 1.41421
#define B2 2.61313
#define B3 1.41421
#define B4 1.08239
#define B5 0.76537
//for DCT
#define A1 0.70711
#define A2 0.54120
#define A3 0.70711
#define A4 1.30658
#define A5 0.38268
//for MMX(tm) Technology DCT
#define NSHIFT 14 //8 //13 //14
#define SIMNSHIFT 14 //8 //13 //14 // 8 // simulate pmulh
#define PRESHIFT 2 // 3
#define NMUL (1<<NSHIFT)
#define WA1 (short)(0.70711*(double)NMUL)
#define WA2 (short)(0.54120*(double)NMUL)
#define WA3 (short)(0.70711*(double)NMUL)
#define WA4 (short)(1.30658*(double)NMUL)
#define WA5 (short)(0.38268*(double)NMUL)
extern int max_error9;
extern int max_error10;
double sqrt1(double n)
{
return sqrt(n); //israelh. patch
}
static int coeff_initinalizer(float y[64]);
short clip(short a, short b, short c);
short int_val(float f)
{
return (short)floor(f+0.5);
}
float ps[64],ps1[64],ps3[8],ps4[6];
//int dummy=coeff_initinalizer(ps); // auto initialize ps..
void init_aan(void)
{
int dummy;
dummy=coeff_initinalizer(ps); // auto initialize ps..
}
static int coeff_initinalizer(float y[64])
{
float ei,ej,ci,cj,di;
int i,j;
for( i=0; i<8; i++ ) {
ei = i==0 ? 1/sqrt(8.) : 0.5;
ci = i==0 ? 1/(8.*sqrt(2.)) : 1.0/16.0;
// di = i==0 ? 1.5/(sqrt(2.)) : 0.5;
ps3[i] = 2.0*2.0*ci/cos(i*M_PI/16); //israelh. this is table1 from AAN paper. Note the trick if 8 or 16 deivision
//
for( j=0; j<8; j++) {
ej = j==0 ? 1/sqrt(8.) : 0.5;
cj = j==0 ? 1/(8*sqrt(2.)) : 1.0/16.0;
y[i*8 + j] = ei*ej*cos(i*M_PI/16)*cos(j*M_PI/16);
ps1[i*8+j] = 4.0*4.0*ci*cj/(cos(i*M_PI/16)*cos(j*M_PI/16)); //israelh. patch the first 4.0?
}
}
ps1[63]=1;
return 0;
}
static void prescale(var v[64])
{
float vtemp[64];
int i;
for( i=0; i<64; i++ ) {
vtemp[i]=(float)v[i];
vtemp[i] *= ps[i];
v[i]=int_val(vtemp[i]);
}
}
static void postscale(var v[64])
{
float vtemp[64];
int i;
for( i=0; i<64; i++ ) {
vtemp[i]=(float)v[i];
vtemp[i] *= ps1[i];
v[i]=int_val(vtemp[i]);
}
}
void aan_line(var in_var[64], var out_var[64], int start, int step)
{
var *in[8], *out[8], v0, v1, v2, v3, v4, v5, v6, v7;
var v04, v05, v44, v45, v22, v23, v24, v25, v62, v64, v65 ;
var v51, v53, v54, v55, v11, v12, v13, v15,
v71, v73, v74, v75, v31, v32, v35, va2, va3;
int i,n;
for( i=start, n=0; n<8; i+=step, n++ ) {
in[n] = &in_var[i];
out[n] = &out_var[i];
}
v0 = *in[0];
v1 = *in[1];
v2 = *in[2];
v3 = *in[3];
v4 = *in[4];
v5 = *in[5];
v6 = *in[6];
v7 = *in[7];
// even part
v22 = v2-v6;
v62 = v2+v6;
// v23 = v22*B1;
v23 = int_val((float)v22*B1);
v04 = v0+v4;
v44 = v0-v4;
v24 = v23-v62;
v64 = v62;
v05 = v04+v64;
v45 = v44+v24;
v25 = v44-v24;
v65 = v04-v64;
// odd part
v51 = v5-v3;
v11 = v1+v7;
v71 = v1-v7;
v31 = v5+v3;
v12 = v11-v31;
v32 = v11+v31;
va2 = v51-v71;
v53 = int_val((short)v51*B2);
v13 = int_val((short)v12*B3);
v73 = int_val((short)v71*B4);
va3 = int_val((short)va2*B5);
v54 = va3-v53;
v74 = v73-va3;
v35 = v32;
v75 = v74-v35;
v15 = v13-v75;
v55 = (v54+v15); /* inverted to avoid the unary - operator.. */
// output butterfly
*out[0] = v05+v35;
*out[1] = v45+v75;
*out[2] = v25+v15;
*out[3] = v65-v55;
*out[4] = v65+v55;
*out[5] = v25-v15;
*out[6] = v45-v75;
*out[7] = v05-v35;
}
void aan(short *s_in, short *s_out)
{
int i,r,c;
var in[64], tmp[64], out[64];
for( i=0; i<64; i++ )
in[i] = clip(s_in[i],-2040,2040);
prescale(in);
for( c=0; c<8; c++ ) // columns
aan_line(in,tmp,c,8);
for( r=0; r<8; r++ ) // then rows
aan_line(tmp,out,r*8,1);
for( i=0; i<64; i++ )
s_out[i] = int_val((float)out[i]);
}
short clip(short a, short b, short c)
{
if (a<b)
return b;
else if (a>c)
return c;
return a;
}
void dct_aan_line(var in_var[64], var out_var[64], int start, int step)
{
var *in[8], *out[8], v0, v1, v2, v3, v4, v5, v6, v7;
// var v04, v05, v44, v45, v22, v23, v24, v25, v62, v64, v65 ;
// var v51, v53, v54, v55, v11, v12, v13, v15,
// v71, v73, v74, v75, v31, v32, v35, va2, va3;
var v00,v01,v02,v03,v04,v05,v06,v07;
var v10,v11,v12,v13,v14,v15,v16;
var v20,v21,v22;
var v32,v34,v35,v36;
var v42,v43,v45,v47;
var v54,v55,v56,v57,va0;
int i,n;
for( i=start, n=0; n<8; i+=step, n++ ) {
in[n] = &in_var[i];
out[n] = &out_var[i];
}
v0 = *in[0];
v1 = *in[1];
v2 = *in[2];
v3 = *in[3];
v4 = *in[4];
v5 = *in[5];
v6 = *in[6];
v7 = *in[7];
// first butterfly stage
v00 = v0+v7; //0
v07 = v0-v7; //7
v01 = v1+v6; //1
v06 = v1-v6; //6
v02 = v2+v5; //2
v05 = v2-v5; //5
v03 = v3+v4; //3
v04 = v3-v4; //4
//second low butterfly
v10=v00+v03; //0
v13=v00-v03; //3
v11=v01+v02; //1
v12=v01-v02; //2
//second high
v16=v06+v07; //6
v15=v05+v06; //5
v14=-(v04+v05); //4
//7 v77 without change
//third (only 3 real new terms)
v20=v10+v11; //0
v21=v10-v11; //1
v22=v12+v13; //2
va0=(v14+v16)*A5; // temporary for A5 multiply
//fourth
v32=v22*A1; // 2
v34=-(v14*A2+va0); // 4 ?
v36=v16*A4-va0; // 6 ?
v35=v15*A3; // 5
//fifth
v42=v32+v13; //2
v43=v13-v32; //3
v45=v07+v35; //5
v47=v07-v35; //7
//last
v54=v34+v47; //4
v57=v47-v34; //7
v55=v45+v36; //5
v56=v45-v36; //5
// output butterfly
*out[0] = v20;
*out[1] = v55;
*out[2] = v42;
*out[3] = v57;
*out[4] = v21;
*out[5] = v54;
*out[6] = v43;
*out[7] = v56;
}
void dct_aan(short *s_in, short *s_out)
{
int i,r,c;
var in[64], tmp[64], out[64];
// if( typeid(var)==typeid(float) || typeid(var)==typeid(double) )
// for( i=0; i<64; i++ )
// in[i] = s_in[i];
// else
for( i=0; i<64; i++ )
in[i] = clip(s_in[i],-2040,2040);
for( c=0; c<8; c++ ) // columns
dct_aan_line(in,tmp,c,8);
for( r=0; r<8; r++ ) // then rows
dct_aan_line(tmp,out,r*8,1);
postscale(out);
for( i=0; i<64; i++ )
s_out[i] = int_val((float)out[i]);
}
void init_aan(void)
{
int dummy;
dummy=coeff_initinalizer(ps); // auto initialize ps..
}
short int_val(float f)
{
return (short)floor(f+0.5);
}
void postscale_transpose(short *v)
{
float vtemp[8][8];
int i,j;
for( i=0; i<8; i++ )
for (j=0; j<8; j++ ){
; vtemp[i][j]=0;
vtemp[i][j]=(float)v[i*8+j];
vtemp[i][j] *= ps1[i*8+j];
}
for( i=0; i<8; i++ )
for( j=0; j<8; j++ ) {
v[j*8+i]=int_val(vtemp[i][j])>>1;
}
}
void transpose(short *v)
{
short vtemp[8][8];
int i,j;
for( i=0; i<8; i++ )
for (j=0; j<8; j++ ){
vtemp[i][j]=v[i*8+j];
}
for( i=0; i<8; i++ )
for( j=0; j<8; j++ ) {
v[j*8+i]=vtemp[i][j];
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -