📄 tonyjpegencoder.cpp

📁 About JPEG, executable on Visual C++
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
			*( py[((y>>3)<<1) + (x>>3)] ++ ) = 
				((m_RToY[ r ]  + m_GToY[ g ]  + m_BToY[ b ] )>>16) -128;	
			
			//	Equal to: (( x%2 == 0 )&&( y%2 == 0 ))
			if( (!(y & 1L)) && (!(x & 1L)) )
			{
				*(pcb++) = 
					((m_RToCb[ r ] + m_GToCb[ g ] + m_BToCb[ b ])>>16) -128;
				*(pcr++) = 
					((m_RToCr[ r ] + m_GToCr[ g ] + m_BToCr[ b ])>>16) -128;
			}
		}
	}
}

////////////////////////////////////////////////////////////////////////////////

/************************************************************************** 
 * (1)	Direct dct algorithms:
 *	are also available, but they are much more complex and seem not to 
 *  be any faster when reduced to code.
 *
 *************************************************************************
 * (2)  LL&M dct algorithm:
 *	This implementation is based on an algorithm described in
 *  C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
 *  Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
 *  Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
 *	The primary algorithm described there uses 11 multiplies and 29 adds.
 *	We use their alternate method with 12 multiplies and 32 adds.
 *
 ***************************************************************************
 * (3)	AA&N DCT algorithm:
 * This implementation is based on Arai, Agui, and Nakajima's algorithm for
 * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
 * Japanese, but the algorithm is described in the Pennebaker & Mitchell
 * JPEG textbook (see REFERENCES section in file README).  The following 
 * code is based directly on figure 4-8 in P&M.
 *
 * The AA&N method needs only 5 multiplies and 29 adds. 
 *
 * The primary disadvantage of this method is that with fixed-point math,
 * accuracy is lost due to imprecise representation of the scaled
 * quantization values.  The smaller the quantization table entry, the less
 * precise the scaled value, so this implementation does worse with high-
 * quality-setting files than with low-quality ones.
 ***************************************************************************
 */

//	AA&N DCT algorithm implemention


void CTonyJpegEncoder::ForwardDct( 
		int* data,	//source data, length is 64 
		int* coef	//output dct coefficients
		)
{

////////////////////////////////////////////////////////////////////////////
//	define some macroes 
	
//	Scale up the float with 1<<8; so (int)(0.382683433 * 1<<8 ) = 98
#define FIX_0_382683433  ((int)98)		/* FIX(0.382683433) */
#define FIX_0_541196100  ((int)139)		/* FIX(0.541196100) */
#define FIX_0_707106781  ((int)181)		/* FIX(0.707106781) */
#define FIX_1_306562965  ((int)334)		/* FIX(1.306562965) */
	
//	This macro changes float multiply into int multiply and right-shift
//	MULTIPLY(a, FIX_0_707106781) = (short)( 0.707106781 * a )
#define MULTIPLY(var,cons)  (int)(((cons) * (var)) >> 8 )

////////////////////////////////////////////////////////////////////////////

	static const int DCTSIZE = 8;
	int x, y;
	int *dataptr;
	int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
	int tmp10, tmp11, tmp12, tmp13;
	int z1, z2, z3, z4, z5, z11, z13, *coefptr;
	
	/* Pass 1: process rows. */
	
	dataptr = data;		//input
	coefptr = coef;		//output	
	for( y = 0; y < 8; y++ ) 
	{
		tmp0 = dataptr[0] + dataptr[7];
		tmp7 = dataptr[0] - dataptr[7];
		tmp1 = dataptr[1] + dataptr[6];
		tmp6 = dataptr[1] - dataptr[6];
		tmp2 = dataptr[2] + dataptr[5];
		tmp5 = dataptr[2] - dataptr[5];
		tmp3 = dataptr[3] + dataptr[4];
		tmp4 = dataptr[3] - dataptr[4];
		
		/* Even part */
		
		tmp10 = tmp0 + tmp3;	/* phase 2 */
		tmp13 = tmp0 - tmp3;
		tmp11 = tmp1 + tmp2;
		tmp12 = tmp1 - tmp2;
		
		coefptr[0] = tmp10 + tmp11; /* phase 3 */
		coefptr[4] = tmp10 - tmp11;
		
		z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
		coefptr[2] = tmp13 + z1;	/* phase 5 */
		coefptr[6] = tmp13 - z1;
		
		/* Odd part */
		
		tmp10 = tmp4 + tmp5;	/* phase 2 */
		tmp11 = tmp5 + tmp6;
		tmp12 = tmp6 + tmp7;
		
		/* The rotator is modified from fig 4-8 to avoid extra negations. */
		z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433);	/* c6 */
		z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;		/* c2-c6 */
		z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;		/* c2+c6 */
		z3 = MULTIPLY(tmp11, FIX_0_707106781);			/* c4 */
		
		z11 = tmp7 + z3;		/* phase 5 */
		z13 = tmp7 - z3;
		
		coefptr[5] = z13 + z2;	/* phase 6 */
		coefptr[3] = z13 - z2;
		coefptr[1] = z11 + z4;
		coefptr[7] = z11 - z4;
		
		dataptr += 8;		/* advance pointer to next row */
		coefptr += 8;
	}
	
	/* Pass 2: process columns. */	

	coefptr = coef;		//both input and output
	for ( x = 0; x < 8; x++ ) 
	{
		tmp0 = coefptr[DCTSIZE*0] + coefptr[DCTSIZE*7];
		tmp7 = coefptr[DCTSIZE*0] - coefptr[DCTSIZE*7];
		tmp1 = coefptr[DCTSIZE*1] + coefptr[DCTSIZE*6];
		tmp6 = coefptr[DCTSIZE*1] - coefptr[DCTSIZE*6];
		tmp2 = coefptr[DCTSIZE*2] + coefptr[DCTSIZE*5];
		tmp5 = coefptr[DCTSIZE*2] - coefptr[DCTSIZE*5];
		tmp3 = coefptr[DCTSIZE*3] + coefptr[DCTSIZE*4];
		tmp4 = coefptr[DCTSIZE*3] - coefptr[DCTSIZE*4];
		
		/* Even part */
		
		tmp10 = tmp0 + tmp3;	/* phase 2 */
		tmp13 = tmp0 - tmp3;
		tmp11 = tmp1 + tmp2;
		tmp12 = tmp1 - tmp2;
		
		coefptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
		coefptr[DCTSIZE*4] = tmp10 - tmp11;
		
		z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
		coefptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
		coefptr[DCTSIZE*6] = tmp13 - z1;
		
		/* Odd part */
		
		tmp10 = tmp4 + tmp5;	/* phase 2 */
		tmp11 = tmp5 + tmp6;
		tmp12 = tmp6 + tmp7;
		
		/* The rotator is modified from fig 4-8 to avoid extra negations. */
		z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
		z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
		z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
		z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
		
		z11 = tmp7 + z3;		/* phase 5 */
		z13 = tmp7 - z3;
		
		coefptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
		coefptr[DCTSIZE*3] = z13 - z2;
		coefptr[DCTSIZE*1] = z11 + z4;
		coefptr[DCTSIZE*7] = z11 - z4;
		
		coefptr++;			/* advance pointer to next column */
	}
}


////////////////////////////////////////////////////////////////////////////////

void CTonyJpegEncoder::Quantize( 
		int* coef,	//coef is both in and out
		int iBlock	//block id; Y: 0,1,2,3; Cb: 4; Cr: 5
		)
{
	int temp;
	unsigned short qval, *pQuant;

	if( iBlock < 4 )
		pQuant = m_qtblY;
	else
		pQuant = m_qtblCbCr;

	for (int i = 0; i < 64; i++) 
	{
		qval = pQuant[i];
		temp = coef[i];
		
		/* Divide the coefficient value by qval, ensuring proper rounding.
		* Since C does not specify the direction of rounding for negative
		* quotients, we have to force the dividend positive for portability.
		*
		* In most files, at least half of the output values will be zero
		* (at default quantization settings, more like three-quarters...)
		* so we should ensure that this case is fast.  On many machines,
		* a comparison is enough cheaper than a divide to make a special test
		* a win.  Since both inputs will be nonnegative, we need only test
		* for a < b to discover whether a/b is 0.
		* If your machine's division is fast enough, define FAST_DIVIDE.
		*/

		// Notes: Actually we use the second expression !!
/*
#ifdef FAST_DIVIDE
#define DIVIDE_BY(a,b)	a /= b
#else
*/
#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
//#endif		
		
		if ( temp < 0) 
		{
			temp = -temp;
			temp += qval>>1;	/* for rounding */
			DIVIDE_BY(temp, qval);
			temp = -temp;
		} 
		else 
		{
			temp += qval>>1;	/* for rounding */
			DIVIDE_BY(temp, qval);
		}
		
		coef[i] = temp;		
    }
}



////////////////////////////////////////////////////////////////////////////////

bool CTonyJpegEncoder::HuffmanEncode( 
		int* pCoef,				//	DCT coefficients
		int iBlock				//	0,1,2,3:Y; 4:Cb; 5:Cr;
		)
{	
	/*
	* jpeg_natural_order[i] is the natural-order position of the i'th element
	* of zigzag order.
	*
	* When reading corrupted data, the Huffman decoders could attempt
	* to reference an entry beyond the end of this array (if the decoded
	* zero run length reaches past the end of the block).  To prevent
	* wild stores without adding an inner-loop test, we put some extra
	* "63"s after the real entries.  This will cause the extra coefficient
	* to be stored in location 63 of the block, not somewhere random.
	* The worst case would be a run-length of 15, which means we need 16
	* fake entries.
	*/
	static const int jpeg_natural_order[64+16] = {
			 0,  1,  8, 16,  9,  2,  3, 10,
			17, 24, 32, 25, 18, 11,  4,  5,
			12, 19, 26, 33, 40, 48, 41, 34,
			27, 20, 13,  6,  7, 14, 21, 28,
			35, 42, 49, 56, 57, 50, 43, 36,
			29, 22, 15, 23, 30, 37, 44, 51,
			58, 59, 52, 45, 38, 31, 39, 46,
			53, 60, 61, 54, 47, 55, 62, 63,
			63, 63, 63, 63, 63, 63, 63, 63,//extra entries for safety
			63, 63, 63, 63, 63, 63, 63, 63
	};
	
	int temp, temp2, nbits, k, r, i;
	int *block = pCoef;
	int *pLastDc = &m_dcY;
	HUFFMAN_TABLE *dctbl, *actbl;

	if( iBlock < 4 )
	{
		dctbl = & m_htblYDC;
		actbl = & m_htblYAC;
//		pLastDc = &m_dcY;	
	}
	else
	{
		dctbl = & m_htblCbCrDC;
		actbl = & m_htblCbCrAC;

		if( iBlock == 4 )
			pLastDc = &m_dcCb;
		else
			pLastDc = &m_dcCr;
	}
	
	/* Encode the DC coefficient difference per section F.1.2.1 */
	
	temp = temp2 = block[0] - (*pLastDc);
	*pLastDc = block[0];
	
	if (temp < 0) {
		temp = -temp;		/* temp is abs value of input */
		/* For a negative input, want temp2 = bitwise complement of abs(input) */
		/* This code assumes we are on a two's complement machine */
		temp2 --;
	}
	
	/* Find the number of bits needed for the magnitude of the coefficient */
	nbits = 0;
	while (temp) {
		nbits ++;
		temp >>= 1;
	}
	
	//	Write category number
	if (! EmitBits( dctbl->code[nbits], dctbl->size[nbits] ))
		return FALSE;

	//	Write category offset
	if (nbits)			/* EmitBits rejects calls with size 0 */
	{
		if (! EmitBits( (unsigned int) temp2, nbits ))
			return FALSE;
	}
	
	////////////////////////////////////////////////////////////////////////////
	/* Encode the AC coefficients per section F.1.2.2 */
	
	r = 0;			/* r = run length of zeros */
	
	for (k = 1; k < 64; k++) 
	{
		if ((temp = block[jpeg_natural_order[k]]) == 0) 
		{
			r++;
		} 
		else 
		{
			/* if run length > 15, must emit special run-length-16 codes (0xF0) */
			while (r > 15) {
				if (! EmitBits( actbl->code[0xF0], actbl->size[0xF0] ))
					return FALSE;
				r -= 16;
			}
			
			temp2 = temp;
			if (temp < 0) {
				temp = -temp;		/* temp is abs value of input */
				/* This code assumes we are on a two's complement machine */
				temp2--;
			}
			
			/* Find the number of bits needed for the magnitude of the coefficient */
			nbits = 1;		/* there must be at least one 1 bit */
			while ((temp >>= 1))
				nbits++;
			
			/* Emit Huffman symbol for run length / number of bits */
			i = (r << 4) + nbits;
			if (! EmitBits( actbl->code[i], actbl->size[i] ))
				return FALSE;
						
			//	Write Category offset
			if (! EmitBits( (unsigned int) temp2, nbits ))
				return FALSE;
						
			r = 0;
		}
	}
	
	//If all the left coefs were zero, emit an end-of-block code
	if (r > 0)
	{
		if (! EmitBits( actbl->code[0], actbl->size[0] ))
			return FALSE;
	}		
	
	return TRUE;
}


////////////////////////////////////////////////////////////////////////////////

/* Outputting bits to the file */

/* Only the right 24 bits of put_buffer are used; the valid bits are
 * left-justified in this part.  At most 16 bits can be passed to EmitBits
 * in one call, and we never retain more than 7 bits in put_buffer
 * between calls, so 24 bits are sufficient.
 */

inline bool CTonyJpegEncoder::EmitBits(
		unsigned int code,		//Huffman code
		int size				//Size in bits of the Huffman code
		)
{
	/* This routine is heavily used, so it's worth coding tightly. */
	int put_buffer = (int) code;
	int put_bits = m_nPutBits;
	
	/* if size is 0, caller used an invalid Huffman table entry */
	if (size == 0)
		return false;
	
	put_buffer &= (((int)1)<<size) - 1; /* mask off any extra bits in code */
	
	put_bits += size;					/* new number of bits in buffer */
	
	put_buffer <<= 24 - put_bits;		/* align incoming bits */
	
	put_buffer |= m_nPutVal;			/* and merge with old buffer contents */
	
	//	If there are more than 8 bits, write it out
	unsigned char uc;
	while (put_bits >= 8) 
	{
		//	Write one byte out !!!!
		uc = (unsigned char) ((put_buffer >> 16) & 0xFF);
		emit_byte(uc);
	
		if (uc == 0xFF) {		//need to stuff a zero byte?
			emit_byte(0);	//	Write one byte out !!!!
		}

		put_buffer <<= 8;
		put_bits -= 8;
	}
	
	m_nPutVal	= put_buffer; /* update state variables */
	m_nPutBits	= put_bits;
	
	return TRUE;
}

////////////////////////////////////////////////////////////////////////////////

inline void CTonyJpegEncoder::EmitLeftBits(void)
{
	if (! EmitBits(0x7F, 7)) /* fill 7 bits with ones */
		return;
/*	
	unsigned char uc = (unsigned char) ((m_nPutVal >> 16) & 0xFF);
	emit_byte(uc);		//	Write one byte out !!!!
*/	
	m_nPutVal  = 0;
	m_nPutBits = 0;
}


////////////////////////////////////////////////////////////////////////////////
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -