📄 jp5.txt
字号:
#if 1
#define ROT6_C 35468
#define ROT6_SmC 50159
#define ROT6_SpC 121095
#define ROT17_C 77062
#define ROT17_SmC 25571
#define ROT17_SpC 128553
#define ROT37_C 58981
#define ROT37_SmC 98391
#define ROT37_SpC 19571
#define ROT13_C 167963
#define ROT13_SmC 134553
#define ROT13_SpC 201373
#else
#define FX(x) ( (int)floor((x)*(1<<FIX) + .5 ) )
static const double c1 = cos(1.*M_PI/16);
static const double c2 = cos(2.*M_PI/16);
static const double c3 = cos(3.*M_PI/16);
static const double c4 = cos(4.*M_PI/16);
static const double c5 = cos(5.*M_PI/16);
static const double c6 = cos(6.*M_PI/16);
static const double c7 = cos(7.*M_PI/16);
static const int ROT6_C = FX(c2-c6); // 0.541
static const int ROT6_SmC = FX(2*c6); // 0.765
static const int ROT6_SpC = FX(2*c2); // 1.847
static const int ROT17_C = FX(c1+c7); // 1.175
static const int ROT17_SmC = FX(2*c7); // 0.390
static const int ROT17_SpC = FX(2*c1); // 1.961
static const int ROT37_C = FX((c3-c7)/c4); // 0.899
static const int ROT37_SmC = FX(2*(c5+c7)); // 1.501
static const int ROT37_SpC = FX(2*(c1-c3)); // 0.298
static const int ROT13_C = FX((c1+c3)/c4); // 2.562
static const int ROT13_SmC = FX(2*(c3+c7)); // 2.053
static const int ROT13_SpC = FX(2*(c1+c5)); // 3.072
#endif
#define TYPE SHORT
void jpeg_idct( p_jpeg_quality_table p_table, SHORT* In )
{
register TYPE *pIn;
register int i;
int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
pIn = In;
for (i=8; i>0; --i)
{
// odd
mm4 = (int)pIn[7];
mm5 = (int)pIn[5];
mm6 = (int)pIn[3];
mm7 = (int)pIn[1];
mm2 = mm4 + mm6;
mm3 = mm5 + mm7;
ROTATE2(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm1);
ROTATE2(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm1);
ROTATE2(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm1);
mm4 += mm2;
mm5 += mm3;
mm6 += mm2;
mm7 += mm3;
// even
mm3 = (int)pIn[2];
mm2 = (int)pIn[6];
ROTATE2(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, mm1);
LOAD_BUTF(mm0, mm1, 0, 4, Spill, pIn);
mm0 = SHIFTL(mm0, FIX) + HALF(FIX-IPASS);
mm1 = SHIFTL(mm1, FIX) + HALF(FIX-IPASS);
BUTF(mm0, mm3, Spill);
BUTF(mm1, mm2, Spill);
BUTF(mm0, mm7, Spill);
pIn[0] = SHIFTR(mm0, FIX-IPASS);
pIn[7] = SHIFTR(mm7, FIX-IPASS);
BUTF(mm1, mm6, mm0);
pIn[1] = SHIFTR(mm1, FIX-IPASS);
pIn[6] = SHIFTR(mm6, FIX-IPASS);
BUTF(mm2, mm5, mm0);
pIn[2] = SHIFTR(mm2, FIX-IPASS);
pIn[5] = SHIFTR(mm5, FIX-IPASS);
BUTF(mm3, mm4, mm0);
pIn[3] = SHIFTR(mm3, FIX-IPASS);
pIn[4] = SHIFTR(mm4, FIX-IPASS);
pIn += 8;
}
pIn = In;
for (i=8; i>0; --i)
{
// odd
mm4 = (int)pIn[7*8];
mm5 = (int)pIn[5*8];
mm6 = (int)pIn[3*8];
mm7 = (int)pIn[1*8];
mm2 = mm4 + mm6;
mm3 = mm5 + mm7;
ROTATE2(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm1);
ROTATE2(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm1);
ROTATE2(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm1);
mm4 += mm2;
mm5 += mm3;
mm6 += mm2;
mm7 += mm3;
// even
mm3 = (int)pIn[2*8];
mm2 = (int)pIn[6*8];
ROTATE2(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, mm1);
LOAD_BUTF(mm0, mm1, 0*8, 4*8, Spill, pIn);
mm0 = SHIFTL(mm0, FIX) + HALF(FIX+IPASS+3);
mm1 = SHIFTL(mm1, FIX) + HALF(FIX+IPASS+3);
BUTF(mm0, mm3, Spill);
BUTF(mm1, mm2, Spill);
BUTF(mm0, mm7, Spill);
pIn[8*0] = (TYPE) SHIFTR(mm0, FIX+IPASS+3) + 128;
if (pIn[8 * 0] < 0) pIn[8 * 0] = 0;
else if (pIn[8 * 0] > 255) pIn[8 * 0] = 255;
pIn[8*7] = (TYPE) SHIFTR(mm7, FIX+IPASS+3) + 128;
if (pIn[8 * 7] < 0) pIn[8 * 7] = 0;
else if (pIn[8 * 7] > 255) pIn[8 * 7] = 255;
BUTF(mm1, mm6, mm0);
pIn[8*1] = (TYPE) SHIFTR(mm1, FIX+IPASS+3) + 128;
if (pIn[8 * 1] < 0) pIn[8 * 1] = 0;
else if (pIn[8 * 1] > 255) pIn[8 * 1] = 255;
pIn[8*6] = (TYPE) SHIFTR(mm6, FIX+IPASS+3) + 128;
if (pIn[8 * 6] < 0) pIn[8 * 6] = 0;
else if (pIn[8 * 6] > 255) pIn[8 * 6] = 255;
BUTF(mm2, mm5, mm0);
pIn[8*2] = (TYPE) SHIFTR(mm2, FIX+IPASS+3) + 128;
if (pIn[8 * 2] < 0) pIn[8 * 2] = 0;
else if (pIn[8 * 2] > 255) pIn[8 * 2] = 255;
pIn[8*5] = (TYPE) SHIFTR(mm5, FIX+IPASS+3) + 128;
if (pIn[8 * 5] < 0) pIn[8 * 5] = 0;
else if (pIn[8 * 5] > 255) pIn[8 * 5] = 255;
BUTF(mm3, mm4, mm0);
pIn[8*3] = (TYPE) SHIFTR(mm3, FIX+IPASS+3) + 128;
if (pIn[8 * 3] < 0) pIn[8 * 3] = 0;
else if (pIn[8 * 3] > 255) pIn[8 * 3] = 255;
pIn[8*4] = (TYPE) SHIFTR(mm4, FIX+IPASS+3) + 128;
if (pIn[8 * 4] < 0) pIn[8 * 4] = 0;
else if (pIn[8 * 4] > 255) pIn[8 * 4] = 255;
pIn++;
}
}
void jpeg_idct_prepare_qualitytable( p_jpeg_quality_table p_table )
{
}
新一篇: Windows输入法设计的一个遗憾 | 旧一篇: 苏泊尔耗的JPEG解码器[四]
[最终话]最惊心动魄的单元了,IDCT变换。近代图像处理技术的灵魂。本作可使用两种算法。AA&N和LLM算法。其中LLM算法的代码是在网站上找来的,仅可供教学用途。AA&N算法是偶整理的(当然,还是免不了参考别人的代码。)
介于各网站上基本都是抄来的文章,没有详细讲解的,偶就多写一点了。偶数学也不好,花了很多时间来学,理解上可能还是有不少问题,还请多包涵了^^b。
DCT算法是一个矩阵的乘法运算,并且是可逆的。因此,正向变换和反向变换可使用非常类似的算法。
JPEG的发明者曾经在FFT和DCT之间做出取舍,最终选择了DCT,是因为它有很多快速算法。
其基本的优化是,将8*8矩阵的乘法分解成两次矩阵乘法(即人们常说的二维IDCT分解为两次一维IDCT)。公式如下:
Z = AXA(t)
其中A(t)表示A的转置。
X是8*8的输入矩阵。这样,计算起来,就先对X的每一列和A的行进行计算,结果是一列,然后这一列再和A(t)的相对应行进行计算,结果又成为一行。由于每一列或一行的的每一个元素计算包括8次乘法和7次加法,所以一共有8*8*8*2次乘法和7*8*8*2次加法。(大概是这么多,偶数学也不咋滴-___-b)
然后,一维DCT还可以进一步优化,分为奇数列/行和偶数列/行:
/ Y[0] \ / a c a f \ / X[0] \ / b d e g \ / X[1] \
| Y[1] | = | a f -a -c | | X[2] | + | d -g -b -e | | X[3] |
| Y[2] | | a -f -a c | | X[4] | | e -b g d | | X[5] |
\ Y[3] / \ a -c a -f / \ X[6] / \ g -e d -b / \ X[7] /
/ Y[7] \ / a c a f \ / X[0] \ / b d e g \ / X[1] \
| Y[6] | = | a f -a -c | | X[2] | - | d -g -b -e | | X[3] |
| Y[5] | | a -f -a c | | X[4] | | e -b g d | | X[5] |
\ Y[4] / \ a -c a -f / \ X[6] / \ g -e d -b / \ X[7] /
其中Y[0]-Y[7]都是1*8的矩阵,X[1]-X[7]也都是1*8的矩阵。
{a, b, c, d, e, f, g} = 1/2 { cos(pi/4), cos(pi/16), cos(pi/8), cos(3pi/16), cos(5pi/16), cos(3pi/8), cos(7pi/16) }
在这之后的优化算法,就是各有千秋了,比较著名的有ChenDCT,LeeDCT,AA&N算法和LLM算法。其中AA&N算法只需要29次加法和5次乘法。(注意,它是指每次一维运算要29次加法和5次乘法,一共是需要29*8*2次加法和5*8*2次乘法的)。但它的条件是要对输入的矩阵首先各乘以一个因子。因为在矩阵从哈夫曼解开后,是游程码,游程码解开后,要进行反量化,这一次乘法是省不了的,所以把因子先乘到量化表上,就可以省去这些时间了(2007/1/26: 原来写成4次了,经Mr.Chen提醒现改正)。
本作因考虑移植性,使用的AA&N算法是整数算法,对小数进行了乘以256的操作。本作中的任何地方都不会用到浮点数。
LLM算法的速度和AA&N差不多(可能是偶写得太差了?-___-b)
jpegidct.h(这个头文件需要包含,以下两个c文件只能任选一个加到工程中。)
************************************************************************************************************
/**************************************************************************************************
superarhow's JPEG decoder
by superarhow(superarhow@hotmail.com). All rights reserved.
**************************************************************************************************/
#pragma once
#include "jpegdec2.h"
/* 2D-IDCT 变换 */
void jpeg_idct( p_jpeg_quality_table p_table, SHORT* in );
void jpeg_idct_prepare_qualitytable( p_jpeg_quality_table p_table );
*******************************************************************************************************
jpegidct.c(AA&N算法)
********************************************************************************************************
#include "jpegidct.h"
#include "memory.h"
/*
* AA&N reverse-dct arithmetic implemention
* {a, b, c, d, e, f, g} = 1/2 { cos(pi/4), cos(pi/16), cos(pi/8), cos(3pi/16), cos(5pi/16), cos(3pi/8), cos(7pi/16) }
* if we let: (out[8][8] is the temporary place to hold our first 1D-DCT data)
* X[0] = ( in[0, 0], in[1, 0], in[2, 0] ... in[7, 0] )
* X[1] = ( in[0, 1], in[1, 1], in[2, 1] ... in[7, 1] )
* ...
* X[7] = ( in[0, 7], in[1, 7], in[2, 7] ... in[7, 7] )
* Y[0] = ( out[0, 0], out[1, 0], out[2, 0] ... out[7, 0] )
* Y[1] = ( out[0, 1], out[1, 1], out[2, 1] ... out[7, 1] )
* ...
* Y[7] = ( out[0, 7], out[1, 7], out[2, 7] ... out[7, 7] )
* we'll have:
*
* / Y[0] \ / a c a f \ / X[0] \ / b d e g \ / X[1] \
* | Y[1] | = | a f -a -c | | X[2] | + | d -g -b -e | | X[3] |
* | Y[2] | | a -f -a c | | X[4] | | e -b g d | | X[5] |
* \ Y[3] / \ a -c a -f / \ X[6] / \ g -e d -b / \ X[7] /
*
* / Y[7] \ / a c a f \ / X[0] \ / b d e g \ / X[1] \
* | Y[6] | = | a f -a -c | | X[2] | - | d -g -b -e | | X[3] |
* | Y[5] | | a -f -a c | | X[4] | | e -b g d | | X[5] |
* \ Y[4] / \ a -c a -f / \ X[6] / \ g -e d -b / \ X[7] /
*
/* const * 8 */
#define FIX_1414 362
#define FIX_1847 473
#define FIX_1082 277
#define FIX_2613 669
#define FIX_MULDIV(p, q) ((INT32)(p) * (q) / 256)
void jpeg_idct( p_jpeg_quality_table p_table, SHORT* in )
{
BYTE i;
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
INT32 tmp10, tmp11, tmp12, tmp13;
INT32 z5, z10, z11, z12, z13;
INT32 work_maze[64];
SHORT *p_row, *p_out;
INT32 *p_col, *p_work;
DWORD *p_dw_value;
p_row = in;
p_work = work_maze;
p_dw_value = p_table->values;
#define ROW(n) ((INT32)p_row[n*8] * p_dw_value[n*8])
#define COL(n) p_col[n]
/*
* first 1-D IDCT col->row
*/
for ( i = 0; i < 8; ++i ) {
if (p_row[1*8] == 0 && p_row[2*8] == 0 && p_row[3*8] == 0 &&
p_row[4*8] == 0 && p_row[5*8] == 0 && p_row[6*8] == 0 &&
p_row[7*8] == 0) {
p_work[0*8] = p_work[1*8] = p_work[2*8] = p_work[3*8]
= p_work[4*8] = p_work[5*8] = p_work[6*8] = p_work[7*8] = ROW(0);
/* next col */
++p_work;
++p_row;
++p_dw_value;
continue;
}
/* Even part */
tmp0 = ROW(0);
tmp1 = ROW(2);
tmp2 = ROW(4);
tmp3 = ROW(6);
tmp4 = ROW(1);
tmp5 = ROW(3);
tmp6 = ROW(5);
tmp7 = ROW(7);
tmp10 = tmp0 + tmp2; /* phase 3 */
tmp11 = tmp0 - tmp2;
tmp13 = tmp1 + tmp3; /* phases 5-3 */
tmp12 = FIX_MULDIV(tmp1 - tmp3, FIX_1414) - tmp13; /* 2*c4 */
tmp0 = tmp10 + tmp13; /* phase 2 */
tmp3 = tmp10 - tmp13;
tmp1 = tmp11 + tmp12;
tmp2 = tmp11 - tmp12;
/* Odd part */
z13 = tmp6 + tmp5; /* phase 6 */
z10 = tmp6 - tmp5;
z11 = tmp4 + tmp7;
z12 = tmp4 - tmp7;
tmp7 = z11 + z13; /* phase 5 */
tmp11 = FIX_MULDIV(z11 - z13, FIX_1414); /* 2*c4 */
z5 = FIX_MULDIV(z10 + z12, FIX_1847); /* 2*c2 */
tmp10 = FIX_MULDIV(z12, FIX_1082) - z5; /* 2*(c2-c6) */
tmp12 = FIX_MULDIV(z10, -FIX_2613) + z5; /* -2*(c2+c6) */
tmp6 = tmp12 - tmp7; /* phase 2 */
tmp5 = tmp11 - tmp6;
tmp4 = tmp10 + tmp5;
p_work[0*8] = tmp0 + tmp7;
p_work[7*8] = tmp0 - tmp7;
p_work[1*8] = tmp1 + tmp6;
p_work[6*8] = tmp1 - tmp6;
p_work[2*8] = tmp2 + tmp5;
p_work[5*8] = tmp2 - tmp5;
p_work[4*8] = tmp3 + tmp4;
p_work[3*8] = tmp3 - tmp4;
/* next col */
++p_work;
++p_row;
++p_dw_value;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -