📄 huffman2.c
字号:
/*
* notez : there's kind of a bug in FastDecode :
* if the last code in decodes to more than one char, you might
* skip more bits than were written
*
* this can fuck you up in merge-bit-stream cases
*
*/
/*
* notez : you must PackCodeLens BEFORE BuildEncodeTable
* in order to set Min & Max CodeLen
* if you don't PackCodeLens, call SetMinMaxCodeLen
*/
/*
*
* return values for BuildCodeLens :
* 1 = all ok
* 0 = error
* -1 = GotNumSymbols < 2 , don't do Huffman !
*
*/
/*
*
* Huffman2 : 7-5-96
*
* not a general purpose Huffman utility
* does not generate Huffman codes. Codes have same lengths, but
* values are ordered thus:
* for each code length, values are ordered by alphabetic order
*
* for general purpose Huffman, use Huffman.c routines
*
* for deferred-build Huffman (no count transmission, one-pass staticHuff2)
* use the old Huffman.c routines
*
* ---
*
* Encode BuildTree time is nearly identical to old Huffman
* Decode BuildTree time is MUCH shorter!
* EncodeC time is identical
* DecodeC time is shorter
*
* ---
*
* 7-6-95 :
* FastDecode ! uses table lookup for multi-symbol-at-a-time operation!
*
* 7-7-95
* I just peeked the IJG JPEG source & discovered that they use
* this method with MaxNumMadeChars = 1
*
*/
#define DEBUG
/* DEBUG toggles some tests which
garauntee proper operation, but take some time to do */
#define DO_MEMCLEARS
/* DO_MEMCLEARS toggles some unnecessary MemClears
probably a good idea to turn it on for debug purposes */
#define PACKCODES_TOPSYM
/* if this is on, then in PackCodeLens, the value of the highest
non-zero-count symbol is written, and codes are only written up
to that symbol ; this helps compression a good deal in most cases */
/*
*
* Modularized Static Huffman encoding
*
* Notes: works on a LBitIOInfo structure
* this structure must be independently allocated and freed
*
* Uses the O(n) Huffman algorithm with QuickSort, RadixSort, or NoSort
* NOTEZ: RadixSort assumes counts are bounded in the range [0,MaxCharCount]
* if AlphaSize > 64 or so, use the RadixSort
*
* You are responsible for calling
* LBitIO_FlushWrite(HI->BII);
* yourself. It is no longer done in EncodeCDone
*
* You must also call
* LBitIO_InitRead(HI->BII);
* yourself before doing any reads. It is no longer done in DecodeCInit
*
* 7-4-95 new stuff:
* 1. charcounts are now packed as codelengths only (much better packing)
* 2. this creates a problem : two chars with different counts but the
* same code length are no longer distinguishable by the decoder,
* but they were distinguished by the encoder!
* 3. result : Huffman2 : uses manual code assignment
*
*
*/
#include <limits.h>
#include <crbinc/inc.h>
#include <crbinc/memutil.h>
#include <crbinc/lbitio.h>
#include <crbinc/intmath.h>
#ifdef DEBUG
#include <stdio.h>
#endif
//structs,defines:
struct Huff2CodeNode
{
uword Char;
uword Count;
struct Huff2CodeNode * Up;
struct Huff2CodeNode * Down;
/* could use uword indexes instead of pointers */
};
#define HUFF2_CODE_BRANCH 0xFFFF
#define FD_MAXCHARSMADE 4
#define FD_MAXCHARSMADE_SUBONE 3
/* see ' !<>! ' marker */
struct Huff2Info
{
long NumSymbols;
struct LBitIOInfo * BII;
long MinCodeLen,MaxCodeLen;
ulong * CodePrefixByLen;
long * NumCodesOfLen;
long * CodeLenTable;
void * EnDe_codeTable;
long EnDe_codeTableLen;
/* FastDecode buffer */
long NumCharsWaiting;
#if FD_MAXCHARSMADE_SUBONE > 0
uword CharsWaiting[FD_MAXCHARSMADE_SUBONE];
#endif
/* BuildCodeLen stuff */
struct Huff2CodeNode * NodeBase;
struct Huff2CodeNode ** NodeWork;
struct Huff2CodeNode ** MadeNodeWork;
struct Huff2CodeNode * CodeNodeHunk;
long CodeNodeHunkI;
ulong * StackArray;
long SortType;
void * SortWork;
long MaxCharCount;
};
#define HUFF2_SORT_NONE 1 // data must be pre-sorted !
#define HUFF2_SORT_RADIX 2
#define HUFF2_SORT_QSORT 3
struct FastDecodeItem
{
ubyte NumBitsUsed;
ubyte NumCharsMade;
uword CharsMade[FD_MAXCHARSMADE];
};
//protos:
struct Huff2Info * Huff2_Init(long NumSymbols,struct LBitIOInfo * BII,long SortType);
void Huff2_CleanUp(struct Huff2Info *HI);
void Huff2_GetMaxCharCount(struct Huff2Info *HI,long * CharCounts);
void Huff2_ScaleCounts(struct Huff2Info *HI,long * CharCounts,long MaxVal);
long Huff2_BuildCodeLens(struct Huff2Info *HI,long *CharCounts);
bool Huff2_BuildEncodeTable(struct Huff2Info *HI);
bool Huff2_BuildDecodeTable(struct Huff2Info *HI);
bool Huff2_BuildFastDecodeTable(struct Huff2Info *HI);
void Huff2_EncodeC(struct Huff2Info *HI,uword C);
uword Huff2_DecodeC(struct Huff2Info *HI);
uword Huff2_FastDecodeC(struct Huff2Info *HI);
bool Huff2_FastDecodeArray_Ubyte(struct Huff2Info *HI,ubyte * Array,long ArrayLen);
void Huff2_SetMinMaxCodeLen(struct Huff2Info *HI);
void Huff2_PackCodeLens(struct Huff2Info *HI);
void Huff2_UnPackCodeLens(struct Huff2Info *HI);
void Huff2_PackCodeLens_Delta(struct Huff2Info *HI,long * LastCodeLens);
void Huff2_UnPackCodeLens_Delta(struct Huff2Info *HI,long * LastCodeLens);
//sort protos:
bool Huff2RadixSort(struct Huff2CodeNode ** BucketArray,
long MaxCharCount,struct Huff2CodeNode ** Array,long ArraySize);
void Huff2QuickSort(long Left,long Right);
struct Huff2CodeNode ** Huff2QuickSortArray; //global for recursion non-stacking
//functions:
/*
*/
struct Huff2Info * Huff2_Init(long NumSymbols,struct LBitIOInfo * BII,long SortType)
{
struct Huff2Info * HI;
if ( (HI = AllocMem(sizeof(struct Huff2Info),MEMF_ANY|MEMF_CLEAR)) == NULL )
return(NULL);
HI->NumSymbols = NumSymbols;
HI->MaxCharCount = LONG_MAX;
HI->BII = BII;
HI->SortType = SortType;
HI->CodeNodeHunkI = 0;
if ( (HI->CodeLenTable = AllocMem(HI->NumSymbols*sizeof(long),MEMF_ANY|MEMF_CLEAR)) == NULL )
{
Huff2_CleanUp(HI);
return(NULL);
}
if ( (HI->NumCodesOfLen = AllocMem(32*sizeof(long),MEMF_ANY|MEMF_CLEAR)) == NULL )
{
Huff2_CleanUp(HI);
return(NULL);
}
if ( (HI->CodePrefixByLen = AllocMem(32*sizeof(ulong),MEMF_ANY|MEMF_CLEAR)) == NULL )
{
Huff2_CleanUp(HI);
return(NULL);
}
return(HI);
}
/*
*/
void Huff2_CleanUp(struct Huff2Info * HI)
{
SmartFree(HI->NodeWork,sizeofpointer*HI->NumSymbols);
SmartFree(HI->MadeNodeWork,sizeofpointer*HI->NumSymbols*2);
SmartFree(HI->CodeLenTable,HI->NumSymbols*sizeof(long));
SmartFree(HI->NumCodesOfLen,32*sizeof(long));
SmartFree(HI->CodePrefixByLen,32*sizeof(ulong));
SmartFree(HI->CodeNodeHunk,sizeof(struct Huff2CodeNode)*2*HI->NumSymbols);
SmartFree(HI->StackArray,2*(HI->NumSymbols)*sizeof(ulong));
SmartFree(HI->EnDe_codeTable,HI->EnDe_codeTableLen);
if ( HI->SortType == HUFF2_SORT_RADIX )
{
SmartFree(HI->SortWork,sizeofpointer*(HI->MaxCharCount + 1));
}
FreeMem(HI,sizeof(struct Huff2Info));
}
/*
* Compute the codelens into Huff2Info from the CharCounts array
*
NOTEZ: MadeNodeWork is large enough, but you may have the problem of looping
i.e. ( (MadeNodeWorkIn - MadeNodeWorkOut) < NumSymbols ) is just fine,
the problem is ( MadeNodeWorkIn < NumSymbols ) isn't true
thus all made-array lookups are done: [MadeNodeWorkIn % NumSymbols]
10-15-95 : alloced MadeNodeWork to double length so as to eliminate this.
*/
/*
*
* return values :
* 1 = all ok
* 0 = error
* -1 = GotNumSymbols == 1 , don't do Huffman !
* call PackCodeLens
* -2 = GotNumSymbols == 0 , don't do Huffman !
* don't call PackCodeLens
*
*/
long Huff2_BuildCodeLens(struct Huff2Info *HI,long *CharCounts)
{
register struct Huff2CodeNode * CurNode;
register struct Huff2CodeNode ** NodeWork;
register long i,NumSymbols,GotNumSymbols;
/*long swapchar;*/
if ( !HI->CodeNodeHunk || !HI->NodeWork || !HI->MadeNodeWork )
{
if ( HI->CodeNodeHunk || HI->NodeWork || HI->MadeNodeWork )
{
return(0);
}
HI->NodeWork = AllocMem(sizeofpointer*HI->NumSymbols,MEMF_ANY);
HI->MadeNodeWork = AllocMem(sizeofpointer*2*HI->NumSymbols,MEMF_ANY);
HI->CodeNodeHunk = AllocMem(sizeof(struct Huff2CodeNode)*2*HI->NumSymbols,MEMF_ANY);
if ( !HI->CodeNodeHunk || !HI->NodeWork || !HI->MadeNodeWork )
{
return(0);
}
}
/* reset, if this is an already-used Huff2Info */
if ( HI->NodeBase )
{
HI->NodeBase = NULL;
HI->CodeNodeHunkI = 0;
#ifdef DO_MEMCLEARS
MemClearFast((long *)HI->CodeNodeHunk,sizeof(struct Huff2CodeNode)*HI->NumSymbols);
MemClearFast((long *)HI->NodeWork,HI->NumSymbols);
MemClearFast((long *)HI->MadeNodeWork,HI->NumSymbols);
#endif // DO_MEMCLEARS
MemClearFast((long *)HI->CodeLenTable,HI->NumSymbols);
MemClearFast((long *)HI->NumCodesOfLen,32);
HI->MinCodeLen = HI->MaxCodeLen = 0;
}
NodeWork = HI->NodeWork;
NumSymbols = HI->NumSymbols;
/*
* Read the chars into NodeWork
* packing out the NULLs
*
*/
GotNumSymbols=0;
for(i=0;i<NumSymbols;i++) /* $ 12% !! */
{
if ( CharCounts[i] > 0 )
{
NodeWork[GotNumSymbols] = HI->CodeNodeHunk + HI->CodeNodeHunkI; HI->CodeNodeHunkI++;
NodeWork[GotNumSymbols]->Char = i;
NodeWork[GotNumSymbols]->Count = CharCounts[i];
NodeWork[GotNumSymbols]->Up = NodeWork[GotNumSymbols]->Down = NULL;
GotNumSymbols++;
}
}
/* !!!! you should not do any huffman coding if this ever happens ! */
/* NOTEZ: BuildEncodeTable WILL fail ! */
/* PackCodeLens & UnPackCodeLens will work */
if ( GotNumSymbols < 2 )
{
if ( GotNumSymbols == 0 )
{
HI->NodeBase = HI->CodeNodeHunk;
HI->NodeBase->Char = 0;
HI->NodeBase->Count = 1;
HI->NodeBase->Up = HI->NodeBase->Down = NULL;
return(-2);
}
else
{
HI->NodeBase = NodeWork[0];
HI->CodeLenTable[HI->NodeBase->Char] = 1;
/* call PackCodeLens anyway, so the decoder knows which char to copy */
return(-1);
}
}
/*
* sort NodeWork in order of ascending counts
*
*/
switch(HI->SortType)
{
case HUFF2_SORT_NONE:
break;
case HUFF2_SORT_RADIX:
if ( ! HI->SortWork )
{
if ( HI->MaxCharCount == LONG_MAX )
{
return(0);
}
if ( (HI->SortWork = AllocMem(sizeofpointer*(HI->MaxCharCount + 1),MEMF_ANY)) == NULL )
{
return(0);
}
}
if ( ! Huff2RadixSort((struct Huff2CodeNode **)HI->SortWork,
HI->MaxCharCount,NodeWork,GotNumSymbols) ) return(0);
break;
case HUFF2_SORT_QSORT:
Huff2QuickSortArray = NodeWork;
Huff2QuickSort(0,GotNumSymbols-1);
break;
default:
return(0);
break;
}
/* sort puts lowest value of ->Count first */
/* register the tree-building variables */
{
register long MadeNodeWorkIn,MadeNodeWorkOut,NodeWorkOut;
register struct Huff2CodeNode ** MadeNodeWork;
MadeNodeWork = HI->MadeNodeWork;
NodeWorkOut=0;
MadeNodeWorkIn = 0;
MadeNodeWorkOut = 0;
/*
* Build the tree while there are more leaf-nodes to go
*
*/
while( NodeWorkOut < (GotNumSymbols-1) ) /* $ 38 % */
{
CurNode = HI->CodeNodeHunk + HI->CodeNodeHunkI; HI->CodeNodeHunkI++;
CurNode->Char = HUFF2_CODE_BRANCH;
#ifdef DEBUG
if ( HI->CodeNodeHunkI >= (NumSymbols*2) )
{
errputs(" HI->CodeNodeHunkI >= (NumSymbols*2) !");
exit(1);
}
#endif // DEBUG
/*
* get the lowest two
* if counts are equal, use a madenode before a leafnode
*
*/
if ( MadeNodeWorkOut < MadeNodeWorkIn )
{
if ( NodeWork[NodeWorkOut]->Count < MadeNodeWork[MadeNodeWorkOut]->Count )
{
CurNode->Down = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
else
{
CurNode->Down = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
}
if ( MadeNodeWorkOut < MadeNodeWorkIn )
{
if ( NodeWork[NodeWorkOut]->Count < MadeNodeWork[MadeNodeWorkOut]->Count )
{
CurNode->Up = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
else
{
CurNode->Up = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
}
}
else
{
CurNode->Up = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
}
else
{
CurNode->Down = NodeWork[NodeWorkOut]; NodeWorkOut++;
CurNode->Up = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
/* ->Down gets the lower valued count & the lower valued character */
/* make sure lower char is on the Down pointer */
/* notez: this is only needed in encode ! */
/* don't bother swapping the counts, they're not needed anymore */
/*
if ( CurNode->Down->Char != HUFF2_CODE_BRANCH &&
CurNode->Up->Char != HUFF2_CODE_BRANCH )
{
if ( CurNode->Down->Char > CurNode->Up->Char )
{
swapchar = CurNode->Down->Char;
CurNode->Down->Char = CurNode->Up->Char;
CurNode->Up->Char = swapchar;
}
}
*/
/*
* put them into a MadeNode
*
*/
MadeNodeWork[MadeNodeWorkIn] = CurNode; MadeNodeWorkIn++;
CurNode->Count = CurNode->Down->Count + CurNode->Up->Count;
}
/*
* we stop the prior loop when there are less than 2 nodework symbols left
* if there is only one left, this loops until it is eaten off
*
*/
while ( NodeWorkOut < GotNumSymbols && MadeNodeWorkOut < MadeNodeWorkIn )
{
CurNode = HI->CodeNodeHunk + HI->CodeNodeHunkI; HI->CodeNodeHunkI++;
CurNode->Char = HUFF2_CODE_BRANCH;
#ifdef DEBUG
if ( HI->CodeNodeHunkI >= (NumSymbols*2) )
{
errputs(" HI->CodeNodeHunkI >= (NumSymbols*2) !");
exit(1);
}
#endif // DEBUG
if ( NodeWork[NodeWorkOut]->Count < MadeNodeWork[MadeNodeWorkOut]->Count )
{
CurNode->Down = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
else
{
CurNode->Down = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
}
if ( MadeNodeWorkOut < MadeNodeWorkIn )
{
if ( NodeWorkOut < GotNumSymbols )
{
if ( NodeWork[NodeWorkOut]->Count < MadeNodeWork[MadeNodeWorkOut]->Count )
{
CurNode->Up = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
else
{
CurNode->Up = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
}
}
else
{
CurNode->Up = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
}
}
else
{
CurNode->Up = NodeWork[NodeWorkOut]; NodeWorkOut++;
}
MadeNodeWork[MadeNodeWorkIn] = CurNode; MadeNodeWorkIn++;
CurNode->Count = CurNode->Down->Count + CurNode->Up->Count;
}
/*
* there may still be extras in MadeNodeWork
*
*/
while ( MadeNodeWorkOut < (MadeNodeWorkIn-1) )
{
CurNode = HI->CodeNodeHunk + HI->CodeNodeHunkI; HI->CodeNodeHunkI++;
CurNode->Char = HUFF2_CODE_BRANCH;
CurNode->Down = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
CurNode->Up = MadeNodeWork[MadeNodeWorkOut]; MadeNodeWorkOut++;
#ifdef DEBUG
if ( HI->CodeNodeHunkI >= (NumSymbols*2) )
{
errputs(" HI->CodeNodeHunkI >= (NumSymbols*2) !");
exit(1);
}
#endif // DEBUG
MadeNodeWork[MadeNodeWorkIn] = CurNode; MadeNodeWorkIn++;
CurNode->Count = CurNode->Down->Count + CurNode->Up->Count;
}
/*
* grab the base node out
*
*/
HI->NodeBase = MadeNodeWork[(MadeNodeWorkIn-1)];
if ( HI->NodeBase == NULL )
{
return(0); /* debugger catch point */
}
}
/* end tree-building registered variables */
/* now read out the codelens into an array */
{
long * CodeLenTable;
long * NumCodesOfLen;
register struct Huff2CodeNode * CurNode;
register ulong CurCodeLen;
register ulong GotC;
register ulong * StackArray;
register long StackI;
if ( ! HI->StackArray )
{
if ( (HI->StackArray = AllocMem(2*(HI->NumSymbols)*sizeof(ulong),MEMF_ANY)) == NULL )
return(0);
}
NumCodesOfLen = HI->NumCodesOfLen;
CodeLenTable = HI->CodeLenTable;
StackArray = HI->StackArray;
StackArray[0] = (ulong) HI->NodeBase;
StackArray[1] = 0;
StackI = 2;
while ( StackI > 0 ) /* $ 15% */
{
StackI--;
CurCodeLen = StackArray[StackI];
StackI--;
CurNode = (struct Huff2CodeNode *) StackArray[StackI];
/* traverse all Up branches w/o Stack bumping
remember the Down branches as you go
once you reach the end of an Up, go back and get the
most recent Down & traverse it
*/
while ( CurNode )
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -