📄 huff.c
字号:
/************************** Start of HUFF.C ************************* * * This is the Huffman coding module used in Chapter 3. * Compile with BITIO.C, ERRHAND.C, and either MAIN-C.C or MAIN-E.C */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>
#include <math.h>#include "bitio.h"#include "errhand.h"#include "main.h"
/* * The NODE structure is a node in the Huffman decoding tree. It has a * count, which is its weight in the tree, and the node numbers of its * two children. The saved_count member of the structure is only * there for debugging purposes, and can be safely taken out at any * time. It just holds the intial count for each of the symbols, since * the count member is continually being modified as the tree grows. */typedef struct tree_node { unsigned int count; unsigned int saved_count; int child_0; int child_1;} NODE;/* * A Huffman tree is set up for decoding, not encoding. When encoding, * I first walk through the tree and build up a table of codes for * each symbol. The codes are stored in this CODE structure. */typedef struct code { unsigned int code; int code_bits;} CODE;/* * The special EOS symbol is 256, the first available symbol after all * of the possible bytes. When decoding, reading this symbols * indicates that all of the data has been read in. */#define END_OF_STREAM 256/* * Local function prototypes, defined with or without ANSI prototypes. */#ifdef __STDC__void count_bytes( FILE *input, unsigned long *long_counts );void scale_counts( unsigned long *long_counts, NODE *nodes );int build_tree( NODE *nodes );void convert_tree_to_code( NODE *nodes, CODE *codes, unsigned int code_so_far, int bits, int node );void output_counts( BIT_FILE *output, NODE *nodes );void input_counts( BIT_FILE *input, NODE *nodes );void print_model( NODE *nodes, CODE *codes );void compress_data( FILE *input, BIT_FILE *output, CODE *codes );void expand_data( BIT_FILE *input, FILE *output, NODE *nodes, int root_node );void print_char( int c );
void statistical_info(unsigned long *, CODE *);#else /* __STDC__ */void count_bytes();void scale_counts();int build_tree();void convert_tree_to_code();void output_counts();void input_counts();void print_model();void compress_data();void expand_data();void print_char();
void statistical_info();#endif /* __STDC__ *//* * These two strings are used by MAIN-C.C and MAIN-E.C to print * messages of importance to the user of the program. */char *CompressionName = "static order 0 model with Huffman coding";char *Usage ="infile outfile [-d]\n\nSpecifying -d will dump the modeling data\n";/* * CompressFile is the compression routine called by MAIN-C.C. It * looks for a single additional argument to be passed to it from * the command line: "-d". If a "-d" is present, it means the * user wants to see the model data dumped out for debugging * purposes. * * This routine works in a fairly straightforward manner. First, * it has to allocate storage for three different arrays of data. * Next, it counts all the bytes in the input file. The counts * are all stored in long int, so the next step is scale them down * to single byte counts in the NODE array. After the counts are * scaled, the Huffman decoding tree is built on top of the NODE * array. Another routine walks through the tree to build a table * of codes, one per symbol. Finally, when the codes are all ready, * compressing the file is a simple matter. After the file is * compressed, the storage is freed up, and the routine returns. * */void CompressFile( input, output, argc, argv )FILE *input;BIT_FILE *output;int argc;char *argv[];{ unsigned long *counts; NODE *nodes; CODE *codes; int root_node; counts = (unsigned long *) calloc( 256, sizeof( unsigned long ) ); if ( counts == NULL ) fatal_error( "Error allocating counts array\n" ); if ( ( nodes = (NODE *) calloc( 514, sizeof( NODE ) ) ) == NULL ) fatal_error( "Error allocating nodes array\n" ); if ( ( codes = (CODE *) calloc( 257, sizeof( CODE ) ) ) == NULL ) fatal_error( "Error allocating codes array\n" ); count_bytes( input, counts ); scale_counts( counts, nodes ); output_counts( output, nodes ); root_node = build_tree( nodes ); convert_tree_to_code( nodes, codes, 0, 0, root_node );
statistical_info(counts,codes);
if ( argc > 0 && strcmp( argv[ 0 ], "-d" ) == 0 ) print_model( nodes, codes ); compress_data( input, output, codes ); free( (char *) counts ); free( (char *) nodes ); free( (char *) codes );}/* * ExpandFile is the routine called by MAIN-E.C to expand a file that * has been compressed with order 0 Huffman coding. This routine has * a simpler job than that of the Compression routine. All it has to * do is read in the counts that have been stored in the compressed * file, then build the Huffman tree. The data can then be expanded * by reading in a bit at a time from the compressed file. Finally, * the node array is freed and the routine returns. * */void ExpandFile( input, output, argc, argv )BIT_FILE *input;FILE *output;int argc;char *argv[];{ NODE *nodes; int root_node; if ( ( nodes = (NODE *) calloc( 514, sizeof( NODE ) ) ) == NULL ) fatal_error( "Error allocating nodes array\n" ); input_counts( input, nodes ); root_node = build_tree( nodes ); if ( argc > 0 && strcmp( argv[ 0 ], "-d" ) == 0 ) print_model( nodes, 0 ); expand_data( input, output, nodes, root_node ); free( (char *) nodes );}/* * In order for the compressor to build the same model, I have to store * the symbol counts in the compressed file so the expander can read * them in. In order to save space, I don't save all 256 symbols * unconditionally. The format used to store counts looks like this: * * start, stop, counts, start, stop, counts, ... 0 * * This means that I store runs of counts, until all the non-zero * counts have been stored. At this time the list is terminated by * storing a start value of 0. Note that at least 1 run of counts has * to be stored, so even if the first start value is 0, I read it in. * It also means that even in an empty file that has no counts, I have * to pass at least one count. * * In order to efficiently use this format, I have to identify runs of * non-zero counts. Because of the format used, I don't want to stop a * run because of just one or two zeros in the count stream. So I have * to sit in a loop looking for strings of three or more zero values in * a row. * * This is simple in concept, but it ends up being one of the most * complicated routines in the whole program. A routine that just * writes out 256 values without attempting to optimize would be much * simpler, but would hurt compression quite a bit on small files. * */void output_counts( output, nodes )BIT_FILE *output;NODE *nodes;{ int first; int last; int next; int i; first = 0; while ( first < 255 && nodes[ first ].count == 0 ) first++;/* * Each time I hit the start of the loop, I assume that first is the * number for a run of non-zero values. The rest of the loop is * concerned with finding the value for last, which is the end of the * run, and the value of next, which is the start of the next run. * At the end of the loop, I assign next to first, so it starts in on * the next run. */ for ( ; first < 256 ; first = next ) { last = first + 1; for ( ; ; ) { for ( ; last < 256 ; last++ ) if ( nodes[ last ].count == 0 ) break; last--; for ( next = last + 1; next < 256 ; next++ ) if ( nodes[ next ].count != 0 ) break; if ( next > 255 ) break; if ( ( next - last ) > 3 ) break; last = next; };/* * Here is where I output first, last, and all the counts in between. */ if ( putc( first, output->file ) != first ) fatal_error( "Error writing byte counts\n" ); if ( putc( last, output->file ) != last ) fatal_error( "Error writing byte counts\n" ); for ( i = first ; i <= last ; i++ ) { if ( putc( nodes[ i ].count, output->file ) != (int) nodes[ i ].count ) fatal_error( "Error writing byte counts\n" ); } } if ( putc( 0, output->file ) != 0 ) fatal_error( "Error writing byte counts\n" );}/* * When expanding, I have to read in the same set of counts. This is * quite a bit easier that the process of writing them out, since no * decision making needs to be done. All I do is read in first, check * to see if I am all done, and if not, read in last and a string of * counts. */void input_counts( input, nodes )BIT_FILE *input;NODE *nodes;{ int first; int last; int i; int c; for ( i = 0 ; i < 256 ; i++ ) nodes[ i ].count = 0; if ( ( first = getc( input->file ) ) == EOF ) fatal_error( "Error reading byte counts\n" ); if ( ( last = getc( input->file ) ) == EOF ) fatal_error( "Error reading byte counts\n" ); for ( ; ; ) { for ( i = first ; i <= last ; i++ ) if ( ( c = getc( input->file ) ) == EOF )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -