📄 fdct_aan_ref.c
字号:
//////////////////////////////////////////////////////////////////////////////////////////////////////// Title: fdct_aan_ref.sc (C Reference code for FDCT8x8+Quantization)//// Notice: COPYRIGHT (C) STREAM PROCESSORS, INC. 2005-2007// THIS PROGRAM IS PROVIDED UNDER THE TERMS OF THE SPI// END-USER LICENSE AGREEMENT (EULA). THE PROGRAM MAY ONLY// BE USED IN A MANNER EXPLICITLY SPECIFIED IN THE EULA,// WHICH INCLUDES LIMITATIONS ON COPYING, MODIFYING,// REDISTRIBUTION AND WARANTIES. UNAUTHORIZED USE OF THIS// PROGRAM IS STRICTLY PROHIBITED. YOU MAY OBTAIN A COPY OF// THE EULA FROM WWW.STREAMPROCESSORS.COM. // ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #includes ////////////////////////////////////////////////////////////////////////////////////////////////////#include "spi_common.h"#include "jpege_context.h"//////////////////////////////////////////////////////////////////////////////////////////////////////// Implementation details. This code is modified from IJG code. File jfdctfst.c//// jfdctfst.c// Copyright (C) 1994-1996, Thomas G. Lane.// This file is part of the Independent JPEG Group's software.// For conditions of distribution and use, see the accompanying README file.// // This file contains a fast, not so accurate integer implementation of the// forward DCT (Discrete Cosine Transform).// // A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT// on each column. Direct algorithms are also available, but they are// much more complex and seem not to be any faster when reduced to code.// // This implementation is based on Arai, Agui, and Nakajima's algorithm for// scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in// Japanese, but the algorithm is described in the Pennebaker & Mitchell// JPEG textbook (see REFERENCES section in file README). The following code// is based directly on figure 4-8 in P&M.// While an 8-point DCT cannot be done in less than 11 multiplies, it is// possible to arrange the computation so that many of the multiplies are// simple scalings of the final outputs. These multiplies can then be// folded into the multiplications or divisions by the JPEG quantization// table entries. The AA&N method leaves only 5 multiplies and 29 adds// to be done in the DCT itself.// The primary disadvantage of this method is that with fixed-point math,// accuracy is lost due to imprecise representation of the scaled// quantization values. The smaller the quantization table entry, the less// precise the scaled value, so this implementation does worse with high-// quality-setting files than with low-quality ones.// // This module is specialized to the case DCTSIZE = 8.// Scaling decisions are generally the same as in the LL&M algorithm;// see jfdctint.c for more details. However, we choose to descale// (right shift) multiplication products as soon as they are formed,// rather than carrying additional fractional bits into subsequent additions.// This compromises accuracy slightly, but it lets us save a few shifts.// More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)// everywhere except in the multiplications proper; this saves a good deal// of work on 16-bit-int machines.// // Again to save a few shifts, the intermediate results between pass 1 and// pass 2 are not upscaled, but are represented only to integral precision.// // A final compromise is to represent the multiplicative constants to only// 8 fractional bits, rather than 13. This saves some shifting work on some// machines, and may also reduce the cost of multiplication (since there// are fewer one-bits in the constants).// // Some C compilers fail to reduce "FIX(constant)" at compile time, thus// causing a lot of useless floating-point operations at run time.// To get around this we use the following pre-calculated constants.// If you change CONST_BITS you may want to add appropriate values.// (With a reasonable C compiler, you can just rely on the FIX() macro...)////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// Constants////////////////////////////////////////////////////////////////////////////////#if AAN_TCONST_BITS == 15#define FIX_0_382683433 ((int) 12540) // 0x30FC30FC //((int) 12540) // FIX(0.382683433) Q1.15#define FIX_0_541196100 ((int) 17734) // 0x45464546 //((int) 17734) // FIX(0.541196100) Q1.15#define FIX_0_707106781 ((int) 23170) // 0x5A825A82 //((int) 23170) // FIX(0.707106781) Q1.15#define FIX_1_306562965 ((int) 21407) // FIX(1.306562965) Q2.14#elif AAN_TCONST_BITS == 14#define FIX_0_382683433 ((int) 6270) // FIX(0.382683433)#define FIX_0_541196100 ((int) 8867) // FIX(0.541196100)#define FIX_0_707106781 ((int) 11585) // FIX(0.707106781)#define FIX_1_306562965 ((int) 21407) // FIX(1.306562965)#elif AAN_TCONST_BITS == 12#define FIX_0_382683433 ((int) 1567) //(0.382683433 * (1 << AAN_TCONST_BITS)) FIX(0.382683433) #define FIX_0_541196100 ((int) 2217) // FIX(0.541196100) #define FIX_0_707106781 ((int) 2896) // FIX(0.707106781) #define FIX_1_306562965 ((int) 5352) // FIX(1.306562965) #elif AAN_TCONST_BITS == 8#define FIX_0_382683433 ((int) 98) // FIX(0.382683433) #define FIX_0_541196100 ((int) 139) // FIX(0.541196100) #define FIX_0_707106781 ((int) 181) // FIX(0.707106781) #define FIX_1_306562965 ((int) 334) // FIX(1.306562965) #else#define FIX_0_382683433 FIX(0.382683433)#define FIX_0_541196100 FIX(0.541196100)#define FIX_0_707106781 FIX(0.707106781)#define FIX_1_306562965 FIX(1.306562965)#endif///////////////////////////////////////////////////////////////////////short quantize_ref ( short data, unsigned short scale)// Description: Input 'data' is quantized by using the 'scale' value // (quantization divisor). The result is rounded and shifted// to retain the most precise part of the result.//// Returns: short.///////////////////////////////////////////////////////////////////////{ if (data < 0) { data = -data; data = (data * (short)scale + DIV_CONST_HALF) >> DIV_CONST_BITS; data = -data; } else { data = (data * (short)scale + DIV_CONST_HALF) >> DIV_CONST_BITS; } return data;}///////////////////////////////////////////////////////////////////////void fdct_and_quantize_aan_ref ( char *p_in, short *p_out, unsigned short *p_quant_divisor)// Description: From Pennebaker/Mitchell, pg. 50-52. See also Arai, Agui,// Nakajima. This algorithm is based on the 16-pt DFT. Basically,// the 8-pt DCT can be calculated by scaling the real parts of the// output of the 16-pt DFT.//// Returns: Nothing.///////////////////////////////////////////////////////////////////////{ short a0, a1, a2, a3, a4, a5, a6, a7; short b0, b1, b2, b3; short z1, z2, z3, z4, z5, z11, z13; char *p_in_ptr; short *p_out_ptr; unsigned short *p_divisor; int i, j, k; // Pass 1: process columns to match with the kernel code // Pass 1: process rows. p_in_ptr = p_in; p_out_ptr = p_out; for (i = 0; i < BLOCK_WIDTH; i++) { a0 = p_in_ptr[BLOCK_WIDTH * 0] + p_in_ptr[BLOCK_WIDTH * 7]; a7 = p_in_ptr[BLOCK_WIDTH * 0] - p_in_ptr[BLOCK_WIDTH * 7]; a1 = p_in_ptr[BLOCK_WIDTH * 1] + p_in_ptr[BLOCK_WIDTH * 6]; a6 = p_in_ptr[BLOCK_WIDTH * 1] - p_in_ptr[BLOCK_WIDTH * 6]; a2 = p_in_ptr[BLOCK_WIDTH * 2] + p_in_ptr[BLOCK_WIDTH * 5]; a5 = p_in_ptr[BLOCK_WIDTH * 2] - p_in_ptr[BLOCK_WIDTH * 5]; a3 = p_in_ptr[BLOCK_WIDTH * 3] + p_in_ptr[BLOCK_WIDTH * 4]; a4 = p_in_ptr[BLOCK_WIDTH * 3] - p_in_ptr[BLOCK_WIDTH * 4]; // Even part b0 = a0 + a3; // phase 2 b3 = a0 - a3; b1 = a1 + a2; b2 = a1 - a2; p_out_ptr[BLOCK_WIDTH * 0] = b0 + b1; // phase 3 p_out_ptr[BLOCK_WIDTH * 4] = b0 - b1; z1 = (((b2 + b3) * FIX_0_707106781) + 0x4000) >> AAN_TCONST_BITS; // c4 p_out_ptr[BLOCK_WIDTH * 2] = b3 + z1; // phase 5 p_out_ptr[BLOCK_WIDTH * 6] = b3 - z1; // Odd part b0 = a4 + a5; // phase 2 b1 = a5 + a6; b2 = a6 + a7; // The following changes were necessary to maintain bit-exactness b/w streamC and Reference C. // Constant 0x4000 added to the multiplication result is simulate spi_vmulra16i() rounding. // FIX_1_306562965 is in Q2.14 with the rest of the co-efficients in Q1.15. // An additional left shift is seen to convert the Q2.14 result back into Q1.15. // The rotator is modified from fig 4-8 to avoid extra negations. z5 = (((b0 - b2) * FIX_0_382683433) + 0x4000) >> AAN_TCONST_BITS; // c6 z2 = (((b0 * FIX_0_541196100) + 0x4000) >> AAN_TCONST_BITS) + z5; // c2-c6 z4 = ((((b2 * FIX_1_306562965) + 0x4000) >> AAN_TCONST_BITS) << 1) + z5; // c2+c6 z3 = ((b1 * FIX_0_707106781) + 0x4000) >> AAN_TCONST_BITS; // c4 z11 = a7 + z3; // phase 5 z13 = a7 - z3; p_out_ptr[BLOCK_WIDTH*5] = z13 + z2; // phase 6 p_out_ptr[BLOCK_WIDTH*3] = z13 - z2; p_out_ptr[BLOCK_WIDTH*1] = z11 + z4; p_out_ptr[BLOCK_WIDTH*7] = z11 - z4; p_in_ptr++; p_out_ptr++; // advance pointer to next column } p_out_ptr = p_out; for (i = 0; i < BLOCK_HEIGHT; i++) { a0 = p_out_ptr[0] + p_out_ptr[7]; a7 = p_out_ptr[0] - p_out_ptr[7]; a1 = p_out_ptr[1] + p_out_ptr[6]; a6 = p_out_ptr[1] - p_out_ptr[6]; a2 = p_out_ptr[2] + p_out_ptr[5]; a5 = p_out_ptr[2] - p_out_ptr[5]; a3 = p_out_ptr[3] + p_out_ptr[4]; a4 = p_out_ptr[3] - p_out_ptr[4]; // Even part b0 = a0 + a3; // phase 2 b3 = a0 - a3; b1 = a1 + a2; b2 = a1 - a2; p_out_ptr[0] = b0 + b1; // phase 3 p_out_ptr[4] = b0 - b1; z1 = (((b2 + b3) * FIX_0_707106781) + 0x4000) >> AAN_TCONST_BITS; // c4 p_out_ptr[2] = b3 + z1; // phase 5 p_out_ptr[6] = b3 - z1; // Odd part b0 = a4 + a5; // phase 2 b1 = a5 + a6; b2 = a6 + a7; // The rotator is modified from fig 4-8 to avoid extra negations. z5 = (((b0 - b2) * FIX_0_382683433) + 0x4000) >> AAN_TCONST_BITS; // c6 z2 = (((b0 * FIX_0_541196100) + 0x4000) >> AAN_TCONST_BITS) + z5; // c2-c6 z4 = ((((b2 * FIX_1_306562965) + 0x4000) >> AAN_TCONST_BITS) << 1) + z5; // c2+c6 z3 = ((b1 * FIX_0_707106781) + 0x4000) >> AAN_TCONST_BITS; // c4 z11 = a7 + z3; // phase 5 z13 = a7 - z3; p_out_ptr[5] = z13 + z2; // phase 6 p_out_ptr[3] = z13 - z2; p_out_ptr[1] = z11 + z4; p_out_ptr[7] = z11 - z4; p_out_ptr += BLOCK_WIDTH; // advance pointer to next row } // Pass 3: quantize. p_out_ptr = p_out; p_divisor = p_quant_divisor; for (i = 0; i < BLOCK_WIDTH; i++) { for (j = 0; j < BLOCK_HEIGHT; j++) { k = i + j * BLOCK_WIDTH; p_out_ptr[k] = quantize_ref (p_out_ptr[k], p_divisor[k]); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -