📄 ahuff.c
字号:
/************************** Start of AHUFF.C ************************* * * This is the adaptive Huffman coding module used in Chapter 4. * Compile with BITIO.C, ERRHAND.C, and either MAIN-C.C or MAIN-E.C */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "bitio.h"#include "errhand.h"char *CompressionName = "Adaptive Huffman coding, with escape codes";char *Usage = "infile outfile [ -d ]";#define END_OF_STREAM 256#define ESCAPE 257#define SYMBOL_COUNT 258#define NODE_TABLE_COUNT ( ( SYMBOL_COUNT * 2 ) - 1 )#define ROOT_NODE 0#define MAX_WEIGHT 0x8000#define TRUE 1#define FALSE 0/* * This data structure is all that is needed to maintain an adaptive * Huffman tree for both encoding and decoding. The leaf array is a * set of indices into the nodes that indicate which node is the * parent of a symbol. For example, to encode 'A', we would find the * leaf node by way of leaf[ 'A' ]. The next_free_node index is used * to tell which node is the next one in the array that can be used. * Since nodes are allocated when characters are read in for the first * time, this pointer keeps track of where we are in the node array. * Finally, the array of nodes is the actual Huffman tree. The child * index is either an index pointing to a pair of children, or an * actual symbol value, depending on whether 'child_is_leaf' is true * or false. */typedef struct tree { int leaf[ SYMBOL_COUNT ]; int next_free_node; struct node { unsigned int weight; int parent; int child_is_leaf; int child; } nodes[ NODE_TABLE_COUNT ];} TREE;/* * The Tree used in this program is a global structure. Under other * circumstances it could just as well be a dynamically allocated * structure built when needed, since all routines here take a TREE * pointer as an argument. */TREE Tree;/* * Function prototypes for both ANSI C compilers and their K&R brethren. */#ifdef __STDC__void CompressFile( FILE *input, BIT_FILE *output, int argc, char *argv[] );void ExpandFile( BIT_FILE *input, FILE *output, int argc, char *argv[] );void InitializeTree( TREE *tree );void EncodeSymbol( TREE *tree, unsigned int c, BIT_FILE *output );int DecodeSymbol( TREE *tree, BIT_FILE *input );void UpdateModel( TREE *tree, int c );void RebuildTree( TREE *tree );void swap_nodes( TREE *tree, int i, int j );void add_new_node( TREE *tree, int c );void PrintTree( TREE *tree );void print_codes( TREE *tree );void print_code( TREE *tree, int c );void calculate_rows( TREE *tree, int node, int level );int calculate_columns( TREE *tree, int node, int starting_guess );int find_minimum_column( TREE *tree, int node, int max_row );void rescale_columns( int factor );void print_tree( TREE *tree, int first_row, int last_row );void print_connecting_lines( TREE *tree, int row );void print_node_numbers( int row );void print_weights( TREE *tree, int row );void print_symbol( TREE *tree, int row );#elsevoid CompressFile();void ExpandFile();void InitializeTree();void EncodeSymbol();int DecodeSymbol();void UpdateModel();void RebuildTree();void swap_nodes();void add_new_node();void PrintTree();void print_codes();void print_code();void calculate_rows();int calculate_columns();void rescale_columns();void print_tree();void print_connecting_lines();void print_node_numbers();void print_weights();void print_symbol();#endif/* * The high level view of the compression routine is very simple. * First, we initialize the Huffman tree, with just the ESCAPE and * END_OF_STREAM symbols. Then, we sit in a loop, encoding symbols, * and adding them to the model. When there are no more characters * to send, the special END_OF_STREAM symbol is encoded. The decoder * will later be able to use this symbol to know when to quit. * * This routine will accept a single additional argument. If the user * passes a "-d" argument, the function will dump out the Huffman tree * to stdout when the program is complete. The code to accomplish this * is thrown in as a bonus., and not documented in the book. */void CompressFile( input, output, argc, argv )FILE *input;BIT_FILE *output;int argc; char *argv[];{ int c; InitializeTree( &Tree ); while ( ( c = getc( input ) ) != EOF ) { EncodeSymbol( &Tree, c, output ); UpdateModel( &Tree, c ); } EncodeSymbol( &Tree, END_OF_STREAM, output ); while ( argc-- > 0 ) { if ( strcmp( *argv, "-d" ) == 0 ) PrintTree( &Tree ); else printf( "Unused argument: %s\n", *argv ); argv++; }}/* * The Expansion routine looks very much like the compression routine. * It first initializes the Huffman tree, using the same routine as * the compressor did. It then sits in a loop, decoding characters and * updating the model until it reads in an END_OF_STREAM symbol. At * that point, it is time to quit. * * This routine will accept a single additional argument. If the user * passes a "-d" argument, the function will dump out the Huffman tree * to stdout when the program is complete. */void ExpandFile( input, output, argc, argv )BIT_FILE *input;FILE *output;int argc;char *argv[];{ int c; InitializeTree( &Tree ); while ( ( c = DecodeSymbol( &Tree, input ) ) != END_OF_STREAM ) { if ( putc( c, output ) == EOF ) fatal_error( "Error writing character" ); UpdateModel( &Tree, c ); } while ( argc-- > 0 ) { if ( strcmp( *argv, "-d" ) == 0 ) PrintTree( &Tree ); else printf( "Unused argument: %s\n", *argv ); argv++; }}/* * When performing adaptive compression, the Huffman tree starts out * very nearly empty. The only two symbols present initially are the * ESCAPE symbol and the END_OF_STREAM symbol. The ESCAPE symbol has to * be included so we can tell the expansion prog that we are transmitting a * previously unseen symbol. The END_OF_STREAM symbol is here because * it is greater than eight bits, and our ESCAPE sequence only allows for * eight bit symbols following the ESCAPE code. * * In addition to setting up the root node and its two children, this * routine also initializes the leaf array. The ESCAPE and END_OF_STREAM * leaf elements are the only ones initially defined, the rest of the leaf * elements are set to -1 to show that they aren't present in the * Huffman tree yet. */void InitializeTree( tree )TREE *tree;{ int i; tree->nodes[ ROOT_NODE ].child = ROOT_NODE + 1; tree->nodes[ ROOT_NODE ].child_is_leaf = FALSE; tree->nodes[ ROOT_NODE ].weight = 2; tree->nodes[ ROOT_NODE ].parent = -1; tree->nodes[ ROOT_NODE + 1 ].child = END_OF_STREAM; tree->nodes[ ROOT_NODE + 1 ].child_is_leaf = TRUE; tree->nodes[ ROOT_NODE + 1 ].weight = 1; tree->nodes[ ROOT_NODE + 1 ].parent = ROOT_NODE; tree->leaf[ END_OF_STREAM ] = ROOT_NODE + 1; tree->nodes[ ROOT_NODE + 2 ].child = ESCAPE; tree->nodes[ ROOT_NODE + 2 ].child_is_leaf = TRUE; tree->nodes[ ROOT_NODE + 2 ].weight = 1; tree->nodes[ ROOT_NODE + 2 ].parent = ROOT_NODE; tree->leaf[ ESCAPE ] = ROOT_NODE + 2; tree->next_free_node = ROOT_NODE + 3; for ( i = 0 ; i < END_OF_STREAM ; i++ ) tree->leaf[ i ] = -1;}/* * This routine is responsible for taking a symbol, and converting * it into the sequence of bits dictated by the Huffman tree. The * only complication is that we are working are way up from the leaf * to the root, and hence are getting the bits in reverse order. This * means we have to rack up the bits in an integer and then send them * out after they are all accumulated. In this version of the program, * we keep our codes in a long integer, so the maximum count is set * to an arbitray limit of 0x8000. It could be set as high as 65535 * if desired. */void EncodeSymbol( tree, c, output )TREE *tree;unsigned int c;BIT_FILE *output;{ unsigned long code; unsigned long current_bit; int code_size; int current_node; code = 0; current_bit = 1; code_size = 0; current_node = tree->leaf[ c ]; if ( current_node == -1 ) current_node = tree->leaf[ ESCAPE ]; while ( current_node != ROOT_NODE ) { if ( ( current_node & 1 ) == 0 ) code |= current_bit; current_bit <<= 1; code_size++; current_node = tree->nodes[ current_node ].parent; }; OutputBits( output, code, code_size ); if ( tree->leaf[ c ] == -1 ) { OutputBits( output, (unsigned long) c, 8 ); add_new_node( tree, c ); }}/* * Decoding symbols is easy. We start at the root node, then go down * the tree until we reach a leaf. At each node, we decide which * child to take based on the next input bit. After getting to the * leaf, we check to see if we read in the ESCAPE code. If we did, * it means that the next symbol is going to come through in the next * eight bits, unencoded. If that is the case, we read it in here, * and add the new symbol to the table. */int DecodeSymbol( tree, input )TREE *tree;BIT_FILE *input;{ int current_node; int c; current_node = ROOT_NODE; while ( !tree->nodes[ current_node ].child_is_leaf ) { current_node = tree->nodes[ current_node ].child; current_node += InputBit( input ); } c = tree->nodes[ current_node ].child; if ( c == ESCAPE ) { c = (int) InputBits( input, 8 ); add_new_node( tree, c ); } return( c );}/* * UpdateModel is called to increment the count for a given symbol. * After incrementing the symbol, this code has to work its way up * through the parent nodes, incrementing each one of them. That is * the easy part. The hard part is that after incrementing each * parent node, we have to check to see if it is now out of the proper * order. If it is, it has to be moved up the tree into its proper * place. */void UpdateModel( tree, c )TREE *tree;int c;{ int current_node; int new_node; if ( tree->nodes[ ROOT_NODE].weight == MAX_WEIGHT ) RebuildTree( tree ); current_node = tree->leaf[ c ]; while ( current_node != -1 ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -