📄 decoder.c

📁 傅立叶变换和小波变换是图像压缩的重要工具。该代大戏是利用小波变换进行图像压缩。
💻 C
字号:
#include "decoder.h"
#include "wavelet.h"
#include "transform.h"
#include "quantize.h"
#include "coder.h"
#include "mathutil.h"
#include "dpcm.h"
#include <crblib/tsc.h>
#include "yuv.h"

/***************

<> todo : try an all int version ?
	@@ would save us loads of time in the ftoi()

(remember to use the cdfxhaar for timing)

------------------

<> BTW the rung ladder is 16k - rather a large part of our cache usage
	we should only touch half of it (the "zero is MPS" half) - about 8k

------------------
(these times are "decode" only, not counting the YUV->RGB & blit at the end)

on a 256x256 ; trans = CDF22xHaar
	0.039886 secs = 182.6 cycles / pel = 1643066 pels /sec

	(on K7 and P2 takes about 133 cycles/pel ; = 62 fps at 500 Mhz!!)

on the K7-500 :
DecodePlane : 0.016255 secs = 136.4 cycles / pel = 4031628 pels /sec

on a 512x512 we have exactly the same cycles/pel = excellent!

the old decoder :
256:	0.070820 secs = 324.2 cycles / pel = 925386 pels /sec
512:	0.330963 secs = 378.8 cycles / pel = 792064 pels /sec
is about twice as slow, with bad scaling

*******************/

void DecodeIt(Wavelet *w,ubyte *compPtr,ubyte * BGRplane,float *fPlanes[3],int imwidth,int imheight,float Q,
					Driver_Window * Driver)
{
int l,p;
float *fPlaneEnd,*LLptr;
SubBand *sb;
RowBuffer ** DCBs;
RowBuffer *tbL,*tbH;
RowAbstract tp;
transformFunc hTranser;
transformRowsFunc vRowTranser;
float *Vptr,*Uptr;
ubyte * BGRptr;
int untransRowBase,blittedRows=0;

	//------------------------------------
	// make the wavelet object & get it ready

	Wavelet_SetupQuantizers(w,Q);

	w->compPtr = compPtr;
	arithDecodeInit(w->ari,w->compPtr);

	//--------------------------------------------
	// prepare for the inverse transform & TCB's

	hTranser    = hTransform->inverse;
	vRowTranser = vTransform->inverseRows;

	assert( hTranser && vRowTranser );

	tbL = w->TransBufL;
	tbH = w->TransBufH;
	DCBs= w->CodeBuffer;

	assert( tbL && tbH && DCBs );

	//------------------------------------

	tp.IsPlaneBuffer = false;
	RowBuffer_Init(&(tp.RB),imwidth,4);

	//------------------------------------

	Vptr = fPlanes[0];
	Uptr = fPlanes[1];
	BGRptr = BGRplane;

	//------------------------------------

	for(p=0;p<3;p++)
	{
	SubBandTree * sbt;
	int sbtwidth,sbtheight,levels;
		sbt = w->trees + p;

		//------------------------------------
		// prep the LL and decode the lowest LL

		sbtwidth = imwidth;
		sbtheight= imheight;
		levels = w->levels;
		if ( p != 2 )
		{
			sbtwidth >>= 1;
			sbtheight>>= 1;
			levels --;
		}

		fPlaneEnd = fPlanes[p] + (sbtwidth * sbtheight);

		sb = &(sbt->LL);
		
		sb->band = fPlaneEnd - (sb->width * sb->height);
		sb->stride = sb->width;

		dpcmCode(sb,levels,w->dpcmLoss,w->ari,false);

		SubBand_Quantize(sb,false);

		//------------------------------------
		// start doing 3 bands at a time : decode,dequant & make a new LL

		for(l=0;l<levels;l++)
		{
		SubBand *bands[3];
		int y,width,height,i;
		LapInfo li[3]; // the coder states

			//-----------------------
			// prepare for coding

			bands[0] = &(sbt->bands[0][l]);
			bands[1] = &(sbt->bands[1][l]);
			bands[2] = &(sbt->bands[2][l]);
				
			height = bands[0]->height;
			width  = bands[0]->width;
			
			// start up the codebuffer:
			for(i=0;i<3;i++)
			{
				bands[i]->band = NULL; // make sure noone touches this

				RowBuffer_Clear(DCBs[i],width);
				
				LapInfo_Reset(&li[i]);
			}

			//--------------------------
			// set up the target for the transform :

			tp.width = width<<1;
			tp.halfh = height;
			untransRowBase = tp.RB.rowCenter;
			
			tp.PB.halfh = height;
			tp.PB.halfw = width; // the target is twice as big as our bands
			tp.PB.height = tp.PB.halfh<<1;
			tp.PB.width = tp.PB.halfw <<1;
			tp.PB.plane = fPlaneEnd - (tp.PB.width * tp.PB.height);
			tp.PB.stride = tp.PB.width;
			tp.PB.untransptr = tp.PB.plane;

			if ( l == (w->levels - 1) )
			{
				// it's a RB
				tp.IsPlaneBuffer = false;
				tp.RB.rowCenter = tp.RB.numRows;
			}
			else
			{
				tp.IsPlaneBuffer = true;
			}

			//-------------------
			// do the rows of the LH,HL,HH

			LLptr = fPlaneEnd - (width * height);

			for(y=0;y<height;y++)
			{
			uint * dPtr[3];
			float * fPtr[3];

			/****************
			*
			*	This row-flow method uses a minimum amount of "hot" memory -
			*		only 6k cache is used for a 256-wide image
			*
				// <>

				On larger cache machines, we may do better by adding some rows
					to the DCB and TCB so that we can do :
						1. decode several rows to the DCB
						2. transform to several rows in the TCB
						3. do vtransform several times on the TCB's
					this gives us better register coherence & lets us stay
						in one function longer (big deal on K7)

				---------------

				we do :
					on 3 bands :
						decode row
						dequant row
						(touches 3 DCB rows = 3*width)
					de-htransform up & down
						(touches the LL row = width)
						(touched 2 target TCB rows = 4*width)
					de-vtransform
						(touches 2 target rows = 2*width)

					total = 10*subb_width floats

					= 5*next_width*4 bytes

					<= 5k for a 256x256 image

				total touched *= subb_height

					= 10 * next_width * next_height bytes
					= 10 bytes per image pel

				at all times :
					the 3 DCB rows should be in cache	(3*4/2 * next_width)
					the 2*2 TCB rows should be in cache (4*4 * next_width)
					= 5.5 K (for 256 width)
				the only hot memory is :
					one LL row in						(4/2 * next_width)
					two LL rows out						(4*2 * next_width)
					= 2.5 K (for 256 width)
					or 1/4 of the image bytes in additional overhead, per level
				adding all levels :
			*		(1/4) + (1/16) + .. = 4/3 of image bytes
			*		= 87k total overhead touched (on 256x256)
			*
			******************/

				for(i=0;i<3;i++)
				{
					// decode a row from this band
					dPtr[i] = decodeRow(bands[i],w->ari,&li[i]);

					// advance the circular buffer
					DCBs[i]->rowCenter ++;

					// get a temp work space; this is the row that will be filled by the next decode
					fPtr[i] = RowBuffer_Row(DCBs[i],0);

					// dequantize to the temp workspace
					dequantizerow( dPtr[i], fPtr[i], width, bands[i]->quantizer);
				}

				// old LL + new LH -> low vert circular buffer

				hTranser( RowBuffer_Row(tbL,0) , LLptr, fPtr[0], width);
				LLptr += width;

				// LH + HL -> high vert circular buffer
				hTranser( RowBuffer_Row(tbH,0) , fPtr[1], fPtr[2], width);

				// now vTrans to the new LL from the two circ DCBs

				vRowTranser( &tp, tbL, tbH, y);

				if ( l == (w->levels - 1) ) // only happens in the last plane
				{
					//-- this is it; we're untransforming into a little rowbuffer
					//	and doing a YUV -> final image from that rowbuffer
					assert( (tp.RB.rowCenter - untransRowBase) <= 4 );
					while( tp.RB.rowCenter >= (untransRowBase + 2) )
					{
						if ( Driver )
						{
							YUV_122toDriver(	RowBuffer_RawRow(&(tp.RB),untransRowBase),
												RowBuffer_RawRow(&(tp.RB),untransRowBase+1),
												Uptr,Vptr,Driver,imwidth);
						}
						else
						{
							YUV_122toBGRbytes(	RowBuffer_RawRow(&(tp.RB),untransRowBase),
												RowBuffer_RawRow(&(tp.RB),untransRowBase+1),
												Uptr,Vptr,BGRptr,imwidth);
							BGRptr += imwidth*6;
						}

						Uptr += (imwidth>>1);
						Vptr += (imwidth>>1);
						untransRowBase += 2;
					}
				}

				tbL->rowCenter ++;
				tbH->rowCenter ++;
			}
			
			assert( l != (w->levels - 1) || tp.RB.rowCenter == untransRowBase );
		}
	}

	assert( Driver || (BGRptr == (BGRplane + imwidth*imheight*3)) );
	assert( Uptr == (fPlanes[1] + (imwidth*imheight>>2)) );
	assert( Vptr == (fPlanes[0] + (imwidth*imheight>>2)) );

	//------------------------------------
	// finish up

	RowBuffer_DeInit(&(tp.RB));

	arithDecodeDone(w->ari);
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -