📄 huff.c
字号:
fatal_error( "Error reading byte counts\n" ); else nodes[ i ].count = (unsigned int) c; if ( ( first = getc( input->file ) ) == EOF ) fatal_error( "Error reading byte counts\n" ); if ( first == 0 ) break; if ( ( last = getc( input->file ) ) == EOF ) fatal_error( "Error reading byte counts\n" ); } nodes[ END_OF_STREAM ].count = 1;}/* * This routine counts the frequency of occurence of every byte in * the input file. It marks the place in the input stream where it * started, counts up all the bytes, then returns to the place where * it started. In most C implementations, the length of a file * cannot exceed an unsigned long, so this routine should always * work. */#ifndef SEEK_SET#define SEEK_SET 0#endifvoid count_bytes( input, counts )FILE *input;unsigned long *counts;{ long input_marker; int c; input_marker = ftell( input ); while ( ( c = getc( input )) != EOF ) counts[ c ]++; fseek( input, input_marker, SEEK_SET );}/* * In order to limit the size of my Huffman codes to 16 bits, I scale * my counts down so they fit in an unsigned char, and then store them * all as initial weights in my NODE array. The only thing to be * careful of is to make sure that a node with a non-zero count doesn't * get scaled down to 0. Nodes with values of 0 don't get codes. */void scale_counts( counts, nodes )unsigned long *counts;NODE *nodes;{ unsigned long max_count; int i; max_count = 0; for ( i = 0 ; i < 256 ; i++ ) if ( counts[ i ] > max_count ) max_count = counts[ i ]; if ( max_count == 0 ) { counts[ 0 ] = 1; max_count = 1; } max_count = max_count / 255; max_count = max_count + 1; for ( i = 0 ; i < 256 ; i++ ) { nodes[ i ].count = (unsigned int) ( counts[ i ] / max_count ); if ( nodes[ i ].count == 0 && counts[ i ] != 0 ) nodes[ i ].count = 1; } nodes[ END_OF_STREAM ].count = 1;}/* * Building the Huffman tree is fairly simple. All of the active nodes * are scanned in order to locate the two nodes with the minimum * weights. These two weights are added together and assigned to a new * node. The new node makes the two minimum nodes into its 0 child * and 1 child. The two minimum nodes are then marked as inactive. * This process repeats until their is only one node left, which is the * root node. The tree is done, and the root node is passed back * to the calling routine. * * Node 513 is used here to arbitratily provide a node with a guaranteed * maximum value. It starts off being min_1 and min_2. After all active * nodes have been scanned, I can tell if there is only one active node * left by checking to see if min_1 is still 513. */int build_tree( nodes )NODE *nodes;{ int next_free; int i; int min_1; int min_2; nodes[ 513 ].count = 0xffff; for ( next_free = END_OF_STREAM + 1 ; ; next_free++ ) { min_1 = 513; min_2 = 513; for ( i = 0 ; i < next_free ; i++ ) if ( nodes[ i ].count != 0 ) { if ( nodes[ i ].count < nodes[ min_1 ].count ) { min_2 = min_1; min_1 = i; } else if ( nodes[ i ].count < nodes[ min_2 ].count ) min_2 = i; } if ( min_2 == 513 ) break; nodes[ next_free ].count = nodes[ min_1 ].count + nodes[ min_2 ].count; nodes[ min_1 ].saved_count = nodes[ min_1 ].count; nodes[ min_1 ].count = 0; nodes[ min_2 ].saved_count = nodes[ min_2 ].count; nodes[ min_2 ].count = 0; nodes[ next_free ].child_0 = min_1; nodes[ next_free ].child_1 = min_2; } next_free--; nodes[ next_free ].saved_count = nodes[ next_free ].count; return( next_free );}/* * Since the Huffman tree is built as a decoding tree, there is * no simple way to get the encoding values for each symbol out of * it. This routine recursively walks through the tree, adding the * child bits to each code until it gets to a leaf. When it gets * to a leaf, it stores the code value in the CODE element, and * returns. */void convert_tree_to_code( nodes, codes, code_so_far, bits, node )NODE *nodes;CODE *codes;unsigned int code_so_far;int bits;int node;{ if ( node <= END_OF_STREAM ) { codes[ node ].code = code_so_far; codes[ node ].code_bits = bits; return; } code_so_far <<= 1; bits++; convert_tree_to_code( nodes, codes, code_so_far, bits, nodes[ node ].child_0 ); convert_tree_to_code( nodes, codes, code_so_far | 1, bits, nodes[ node ].child_1 );}/* * If the -d command line option is specified, this routine is called * to print out some of the model information after the tree is built. * Note that this is the only place that the saved_count NODE element * is used for anything at all, and in this case it is just for * diagnostic information. By the time I get here, and the tree has * been built, every active element will have 0 in its count. */void print_model( nodes, codes )NODE *nodes;CODE *codes;{ int i; for ( i = 0 ; i < 513 ; i++ ) { if ( nodes[ i ].saved_count != 0 ) { printf( "node=" ); print_char( i ); printf( " count=%3d", nodes[ i ].saved_count ); printf( " child_0=" ); print_char( nodes[ i ].child_0 ); printf( " child_1=" ); print_char( nodes[ i ].child_1 ); if ( codes && i <= END_OF_STREAM ) { printf( " Huffman code=" ); FilePrintBinary( stdout, codes[ i ].code, codes[ i ].code_bits ); } printf( "\n" ); } }}/* * The print_model routine uses this function to print out node numbers. * The catch is, if it is a printable character, it gets printed out * as a character. Makes the debug output a little easier to read. */void print_char( c )int c;{ if ( c >= 0x20 && c < 127 ) printf( "'%c'", c ); else printf( "%3d", c );}/* * Once the tree gets built, and the CODE table is built, compressing * the data is a breeze. Each byte is read in, and its corresponding * Huffman code is sent out. */void compress_data( input, output, codes )FILE *input;BIT_FILE *output;CODE *codes;{ int c; while ( ( c = getc( input ) ) != EOF ) OutputBits( output, (unsigned long) codes[ c ].code, codes[ c ].code_bits ); OutputBits( output, (unsigned long) codes[ END_OF_STREAM ].code, codes[ END_OF_STREAM ].code_bits );}/* * Expanding compressed data is a little harder than the compression * phase. As each new symbol is decoded, the tree is traversed, * starting at the root node, reading a bit in, and taking either the * child_0 or child_1 path. Eventually, the tree winds down to a * leaf node, and the corresponding symbol is output. If the symbol * is the END_OF_STREAM symbol, it doesn't get written out, and * instead the whole process terminates. */void expand_data( input, output, nodes, root_node )BIT_FILE *input;FILE *output;NODE *nodes;int root_node;{ int node; for ( ; ; ) { node = root_node; do { if ( InputBit( input ) ) node = nodes[ node ].child_1; else node = nodes[ node ].child_0; } while ( node > END_OF_STREAM ); if ( node == END_OF_STREAM ) break; if ( ( putc( node, output ) ) != node ) fatal_error( "Error trying to write expanded byte to output" ); }}
void statistical_info(unsigned long * counts, CODE *codes)
{
int i;
double total=0;
double * probability;
double entropy=0;
double averwordlen=0;
probability = (double *) calloc ( 256, sizeof(double));
for(i=0;i<256;i++)
total+=counts[i];
printf("The total number of all symbol occurrences is: %f\n" ,total);
printf("\n");
for(i=0;i<256;i++)
{
probability[i] =(counts[i]/total);
if(counts[i]!=0)
{
printf("the number of occurrence of symbol[%d] is: %d\n" ,i,counts[i]);
printf("the probability of symbol[%d] is: %f\n" ,i,probability[i]);
printf("\n");
entropy+=(-probability[i])*log(probability[i]);
averwordlen+=probability[i]*codes[i].code_bits;
}
}
printf("\nEntropy is: %f\n" ,entropy);
printf("Average codeword length is: %f\n" ,averwordlen);
}
/*************************** End of HUFF.C **************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -