📄 tonyjpegencoder.cpp
字号:
*( py[((y>>3)<<1) + (x>>3)] ++ ) =
((m_RToY[ r ] + m_GToY[ g ] + m_BToY[ b ] )>>16) -128;
// Equal to: (( x%2 == 0 )&&( y%2 == 0 ))
if( (!(y & 1L)) && (!(x & 1L)) )
{
*(pcb++) =
((m_RToCb[ r ] + m_GToCb[ g ] + m_BToCb[ b ])>>16) -128;
*(pcr++) =
((m_RToCr[ r ] + m_GToCr[ g ] + m_BToCr[ b ])>>16) -128;
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
/**************************************************************************
* (1) Direct dct algorithms:
* are also available, but they are much more complex and seem not to
* be any faster when reduced to code.
*
*************************************************************************
* (2) LL&M dct algorithm:
* This implementation is based on an algorithm described in
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
* The primary algorithm described there uses 11 multiplies and 29 adds.
* We use their alternate method with 12 multiplies and 32 adds.
*
***************************************************************************
* (3) AA&N DCT algorithm:
* This implementation is based on Arai, Agui, and Nakajima's algorithm for
* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
* Japanese, but the algorithm is described in the Pennebaker & Mitchell
* JPEG textbook (see REFERENCES section in file README). The following
* code is based directly on figure 4-8 in P&M.
*
* The AA&N method needs only 5 multiplies and 29 adds.
*
* The primary disadvantage of this method is that with fixed-point math,
* accuracy is lost due to imprecise representation of the scaled
* quantization values. The smaller the quantization table entry, the less
* precise the scaled value, so this implementation does worse with high-
* quality-setting files than with low-quality ones.
***************************************************************************
*/
// AA&N DCT algorithm implemention
void CTonyJpegEncoder::ForwardDct(
int* data, //source data, length is 64
int* coef //output dct coefficients
)
{
////////////////////////////////////////////////////////////////////////////
// define some macroes
// Scale up the float with 1<<8; so (int)(0.382683433 * 1<<8 ) = 98
#define FIX_0_382683433 ((int)98) /* FIX(0.382683433) */
#define FIX_0_541196100 ((int)139) /* FIX(0.541196100) */
#define FIX_0_707106781 ((int)181) /* FIX(0.707106781) */
#define FIX_1_306562965 ((int)334) /* FIX(1.306562965) */
// This macro changes float multiply into int multiply and right-shift
// MULTIPLY(a, FIX_0_707106781) = (short)( 0.707106781 * a )
#define MULTIPLY(var,cons) (int)(((cons) * (var)) >> 8 )
////////////////////////////////////////////////////////////////////////////
static const int DCTSIZE = 8;
int x, y;
int *dataptr;
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5, z11, z13, *coefptr;
/* Pass 1: process rows. */
dataptr = data; //input
coefptr = coef; //output
for( y = 0; y < 8; y++ )
{
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
coefptr[0] = tmp10 + tmp11; /* phase 3 */
coefptr[4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
coefptr[2] = tmp13 + z1; /* phase 5 */
coefptr[6] = tmp13 - z1;
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/* The rotator is modified from fig 4-8 to avoid extra negations. */
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
coefptr[5] = z13 + z2; /* phase 6 */
coefptr[3] = z13 - z2;
coefptr[1] = z11 + z4;
coefptr[7] = z11 - z4;
dataptr += 8; /* advance pointer to next row */
coefptr += 8;
}
/* Pass 2: process columns. */
coefptr = coef; //both input and output
for ( x = 0; x < 8; x++ )
{
tmp0 = coefptr[DCTSIZE*0] + coefptr[DCTSIZE*7];
tmp7 = coefptr[DCTSIZE*0] - coefptr[DCTSIZE*7];
tmp1 = coefptr[DCTSIZE*1] + coefptr[DCTSIZE*6];
tmp6 = coefptr[DCTSIZE*1] - coefptr[DCTSIZE*6];
tmp2 = coefptr[DCTSIZE*2] + coefptr[DCTSIZE*5];
tmp5 = coefptr[DCTSIZE*2] - coefptr[DCTSIZE*5];
tmp3 = coefptr[DCTSIZE*3] + coefptr[DCTSIZE*4];
tmp4 = coefptr[DCTSIZE*3] - coefptr[DCTSIZE*4];
/* Even part */
tmp10 = tmp0 + tmp3; /* phase 2 */
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
coefptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
coefptr[DCTSIZE*4] = tmp10 - tmp11;
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
coefptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
coefptr[DCTSIZE*6] = tmp13 - z1;
/* Odd part */
tmp10 = tmp4 + tmp5; /* phase 2 */
tmp11 = tmp5 + tmp6;
tmp12 = tmp6 + tmp7;
/* The rotator is modified from fig 4-8 to avoid extra negations. */
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
z11 = tmp7 + z3; /* phase 5 */
z13 = tmp7 - z3;
coefptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
coefptr[DCTSIZE*3] = z13 - z2;
coefptr[DCTSIZE*1] = z11 + z4;
coefptr[DCTSIZE*7] = z11 - z4;
coefptr++; /* advance pointer to next column */
}
}
////////////////////////////////////////////////////////////////////////////////
void CTonyJpegEncoder::Quantize(
int* coef, //coef is both in and out
int iBlock //block id; Y: 0,1,2,3; Cb: 4; Cr: 5
)
{
int temp;
unsigned short qval, *pQuant;
if( iBlock < 4 )
pQuant = m_qtblY;
else
pQuant = m_qtblCbCr;
for (int i = 0; i < 64; i++)
{
qval = pQuant[i];
temp = coef[i];
/* Divide the coefficient value by qval, ensuring proper rounding.
* Since C does not specify the direction of rounding for negative
* quotients, we have to force the dividend positive for portability.
*
* In most files, at least half of the output values will be zero
* (at default quantization settings, more like three-quarters...)
* so we should ensure that this case is fast. On many machines,
* a comparison is enough cheaper than a divide to make a special test
* a win. Since both inputs will be nonnegative, we need only test
* for a < b to discover whether a/b is 0.
* If your machine's division is fast enough, define FAST_DIVIDE.
*/
// Notes: Actually we use the second expression !!
/*
#ifdef FAST_DIVIDE
#define DIVIDE_BY(a,b) a /= b
#else
*/
#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
//#endif
if ( temp < 0)
{
temp = -temp;
temp += qval>>1; /* for rounding */
DIVIDE_BY(temp, qval);
temp = -temp;
}
else
{
temp += qval>>1; /* for rounding */
DIVIDE_BY(temp, qval);
}
coef[i] = temp;
}
}
////////////////////////////////////////////////////////////////////////////////
bool CTonyJpegEncoder::HuffmanEncode(
int* pCoef, // DCT coefficients
int iBlock // 0,1,2,3:Y; 4:Cb; 5:Cr;
)
{
/*
* jpeg_natural_order[i] is the natural-order position of the i'th element
* of zigzag order.
*
* When reading corrupted data, the Huffman decoders could attempt
* to reference an entry beyond the end of this array (if the decoded
* zero run length reaches past the end of the block). To prevent
* wild stores without adding an inner-loop test, we put some extra
* "63"s after the real entries. This will cause the extra coefficient
* to be stored in location 63 of the block, not somewhere random.
* The worst case would be a run-length of 15, which means we need 16
* fake entries.
*/
static const int jpeg_natural_order[64+16] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63,
63, 63, 63, 63, 63, 63, 63, 63,//extra entries for safety
63, 63, 63, 63, 63, 63, 63, 63
};
int temp, temp2, nbits, k, r, i;
int *block = pCoef;
int *pLastDc = &m_dcY;
HUFFMAN_TABLE *dctbl, *actbl;
if( iBlock < 4 )
{
dctbl = & m_htblYDC;
actbl = & m_htblYAC;
// pLastDc = &m_dcY;
}
else
{
dctbl = & m_htblCbCrDC;
actbl = & m_htblCbCrAC;
if( iBlock == 4 )
pLastDc = &m_dcCb;
else
pLastDc = &m_dcCr;
}
/* Encode the DC coefficient difference per section F.1.2.1 */
temp = temp2 = block[0] - (*pLastDc);
*pLastDc = block[0];
if (temp < 0) {
temp = -temp; /* temp is abs value of input */
/* For a negative input, want temp2 = bitwise complement of abs(input) */
/* This code assumes we are on a two's complement machine */
temp2 --;
}
/* Find the number of bits needed for the magnitude of the coefficient */
nbits = 0;
while (temp) {
nbits ++;
temp >>= 1;
}
// Write category number
if (! EmitBits( dctbl->code[nbits], dctbl->size[nbits] ))
return FALSE;
// Write category offset
if (nbits) /* EmitBits rejects calls with size 0 */
{
if (! EmitBits( (unsigned int) temp2, nbits ))
return FALSE;
}
////////////////////////////////////////////////////////////////////////////
/* Encode the AC coefficients per section F.1.2.2 */
r = 0; /* r = run length of zeros */
for (k = 1; k < 64; k++)
{
if ((temp = block[jpeg_natural_order[k]]) == 0)
{
r++;
}
else
{
/* if run length > 15, must emit special run-length-16 codes (0xF0) */
while (r > 15) {
if (! EmitBits( actbl->code[0xF0], actbl->size[0xF0] ))
return FALSE;
r -= 16;
}
temp2 = temp;
if (temp < 0) {
temp = -temp; /* temp is abs value of input */
/* This code assumes we are on a two's complement machine */
temp2--;
}
/* Find the number of bits needed for the magnitude of the coefficient */
nbits = 1; /* there must be at least one 1 bit */
while ((temp >>= 1))
nbits++;
/* Emit Huffman symbol for run length / number of bits */
i = (r << 4) + nbits;
if (! EmitBits( actbl->code[i], actbl->size[i] ))
return FALSE;
// Write Category offset
if (! EmitBits( (unsigned int) temp2, nbits ))
return FALSE;
r = 0;
}
}
//If all the left coefs were zero, emit an end-of-block code
if (r > 0)
{
if (! EmitBits( actbl->code[0], actbl->size[0] ))
return FALSE;
}
return TRUE;
}
////////////////////////////////////////////////////////////////////////////////
/* Outputting bits to the file */
/* Only the right 24 bits of put_buffer are used; the valid bits are
* left-justified in this part. At most 16 bits can be passed to EmitBits
* in one call, and we never retain more than 7 bits in put_buffer
* between calls, so 24 bits are sufficient.
*/
inline bool CTonyJpegEncoder::EmitBits(
unsigned int code, //Huffman code
int size //Size in bits of the Huffman code
)
{
/* This routine is heavily used, so it's worth coding tightly. */
int put_buffer = (int) code;
int put_bits = m_nPutBits;
/* if size is 0, caller used an invalid Huffman table entry */
if (size == 0)
return false;
put_buffer &= (((int)1)<<size) - 1; /* mask off any extra bits in code */
put_bits += size; /* new number of bits in buffer */
put_buffer <<= 24 - put_bits; /* align incoming bits */
put_buffer |= m_nPutVal; /* and merge with old buffer contents */
// If there are more than 8 bits, write it out
unsigned char uc;
while (put_bits >= 8)
{
// Write one byte out !!!!
uc = (unsigned char) ((put_buffer >> 16) & 0xFF);
emit_byte(uc);
if (uc == 0xFF) { //need to stuff a zero byte?
emit_byte(0); // Write one byte out !!!!
}
put_buffer <<= 8;
put_bits -= 8;
}
m_nPutVal = put_buffer; /* update state variables */
m_nPutBits = put_bits;
return TRUE;
}
////////////////////////////////////////////////////////////////////////////////
inline void CTonyJpegEncoder::EmitLeftBits(void)
{
if (! EmitBits(0x7F, 7)) /* fill 7 bits with ones */
return;
/*
unsigned char uc = (unsigned char) ((m_nPutVal >> 16) & 0xFF);
emit_byte(uc); // Write one byte out !!!!
*/
m_nPutVal = 0;
m_nPutBits = 0;
}
////////////////////////////////////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -