📄 060-073.html
字号:
<!DOCTYPE HTML PUBLIC "html.dtd"><HTML><HEAD><TITLE>The Data Compression Book-:The Dawn Age: Minimum Redundancy Coding</TITLE><META NAME="ROBOTS" CONTENT="NOINDEX, NOFOLLOW"><SCRIPT><!--function displayWindow(url, width, height) { var Win = window.open(url,"displayWindow",'width=' + width +',height=' + height + ',resizable=1,scrollbars=yes');}//--></SCRIPT></HEAD><BODY BGCOLOR="#FFFFFF" VLINK="#DD0000" TEXT="#000000" LINK="#DD0000" ALINK="#FF0000"><TD WIDTH="540" VALIGN="TOP"><!-- <CENTER><TABLE><TR><TD><FORM METHOD="GET" ACTION="http://search.itknowledge.com/excite/cgi-bin/AT-foldocsearch.cgi"><INPUT NAME="search" SIZE="20" VALUE=""><BR><CENTER><INPUT NAME="searchButton" TYPE="submit" VALUE="Glossary Search"></CENTER><INPUT NAME="source" TYPE="hidden" VALUE="local" CHECKED> <INPUT NAME="bltext" TYPE="hidden" VALUE="Back to Search"><INPUT NAME="sp" TYPE="hidden" VALUE="sp"></FORM></TD><TD><IMG SRC="http://www.itknowledge.com/images/dotclear.gif" WIDTH="15" HEIGHT="1"></TD><TD><FORM METHOD="POST" ACTION="http://search.itknowledge.com/excite/cgi-bin/AT-subscriptionsearch.cgi"><INPUT NAME="search" SIZE="20" VALUE=""><BR><CENTER><INPUT NAME="searchButton" TYPE="submit" VALUE=" Book Search "></CENTER><INPUT NAME="source" TYPE="hidden" VALUE="local" CHECKED> <INPUT NAME="backlink" TYPE="hidden" VALUE="http://search.itknowledge.com:80/excite/AT-subscriptionquery.html"><INPUT NAME="bltext" TYPE="hidden" VALUE="Back to Search"><INPUT NAME="sp" TYPE="hidden" VALUE="sp"></FORM></TD></TR></TABLE></CENTER> --><!-- ISBN=1558514341//--><!-- TITLE=The Data Compression Book-//--><!-- AUTHOR=Mark Nelson//--><!-- PUBLISHER=IDG Books Worldwide, Inc.//--><!-- IMPRINT=M & T Books//--><!-- CHAPTER=3//--><!-- PAGES=060-073//--><!-- UNASSIGNED1//--><!-- UNASSIGNED2//--><CENTER><TABLE BORDER><TR><TD><A HREF="058-060.html">Previous</A></TD><TD><A HREF="../ewtoc.html">Table of Contents</A></TD><TD><A HREF="074-074.html">Next</A></TD></TR></TABLE></CENTER><P><BR></P><H3><A NAME="Heading15"></A><FONT COLOR="#000077">The Compression Code</FONT></H3><P>The code for Huffman compression and decompression is shown in the listing below. This single file, HUFF.C, is about 500 lines long, of which probably 30 percent is comments. So we are able to implement a static dictionary Huffman compressor in only about 300 lines of code. The actual amount of code could easily be crunched down to a number much less than that. The small code and storage requirements make Huffman coding ideal for applications where both memory and CPU storage are at a premium.</P><!-- CODE //--><PRE>/********************** Start of HUFF.C *************************/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "bitio.h"#include "errhand.h"#include "main.h"/** The NODE structure is a node in the Huffman decoding tree. It has a* count, which is its weight in the tree, and the node numbers of its* two children. The saved_count member of the structure is only* there for debugging purposes, and can be safely taken out at any* time. It just holds the intial count for each of the symbols, since* the count member is continually being modified as the tree grows.*/typedef struct tree_node { unsigned int count; unsigned int saved_count; int child_0; int child_1} NODE;/** A Huffman tree is set up for decoding, not encoding. When encoding,* I first walk through the tree and build up a table of codes for* each symbol. The codes are stored in this CODE structure./*typedef struct code { unsigned int code; int code_bits;} CODE;/** The special EOS symbol is 256, the first available symbol after all* of the possible bytes. When decoding, reading this symbol* indicates that all of the data has been read in.*/#define END_OF_STREAM 256/** Local function prototypes, defined with or without ANSI prototypes.*/#ifdef __STDC__void count_bytes( FILE *input, unsigned long *long_counts );void scale_counts( unsigned long *long_counts, NODE *nodes );int build_tree( NODE *nodes );void convert_tree_to_code( NODE *nodes, CODE *codes, unsigned int code_so_far, int bits, int node );void output_counts( BIT_FILE *output, NODE *nodes );void input_counts( BIT_FILE *input, NODE *nodes );void print_model( NODE *nodes, CODE *codes );void compress_data( FILE *input, BIT_FILE *output, CODE *codes );void expand_data( BIT_FIle *input, FILE *output, NODE *nodes, int root_node );void print_char( int c );#else /* __STDC__ */void count_bytes();void scale_counts();int build_tree();void convert_tree_to_code();void output_counts();void input_counts();void print_model();void compress_data();void expand_data();void print_char();#endif /* __STDC__ *//** These two strings are used by MAIN-C.C and MAIN-E.C to print* messages of importance to the use of the program.*/char *CompressionName = "static order 0 model with Huffman coding";char *Usage ="infile outfile [-d]\n\n\ Specifying -d will dump the modeling\ data\n";/** CompressFile is the compression routine called by MAIN-C.C. It* looks for a single additional argument to be passed to it from* the command line: "-d". If a "-d" is present, it means the* user wants to see the model data dumped out for debugging* purposes.** This routine works in a fairly straightforward manner. First,* it has to allocate storage for three different arrays of data.* Next, it counts all the bytes in the input file. The counts* are all stored in long int, so the next step is to scale them down* to single byte counts in the NODE array. After the counts are* scaled, the Huffman decoding tree is built on top of the NODE* array. Another routine walks through the tree to build a table* of codes, one per symbol. Finally, when the codes are all ready,* compressing the file is a simple matter. After the file is* compressed, the storage is freed up, and the routine returns.**/void CompressFile( input, output, argc, argv )FILE *input;BIT_FILE *output;int argc;char *argv[];{ unsigned long *counts; NODE *nodes; CODE *codes; int root_node; counts = ( unsigned long *) calloc( 256, sizeof( unsigned long ) ); if ( counts == NULL ) fatal_error( "Error allocating counts array\n" ); if ( ( nodes = (NODE *) calloc( 514, sizeof( NODE ) ) ) == NULL ) fatal_error( "Error allocating nodes array\n" ); if ( ( codes = (CODE *) calloc( 257, sizeof( CODE ) ) ) == NULL ) fatal_error( "Error allocating codes array\n" ); count_bytes( input, counts ); scale_counts( counts, nodes ); output_counts( output, nodes ); root_node = build_tree( nodes ); convert_tree_to_code( nodes, codes, 0, 0, root_node ); if ( argc > 0 && strcmp( argv[ 0 ], "-d" ) == 0 ) print_model( nodes, codes ); compress_data( input, output, codes ); free( (char *) counts ); free( (char *) nodes ); free( (char *) codes );}/** ExpandFile is the routine called by MAIN-E.C to expand a file that* has been compressed with order 0 Huffman coding. This routine has* a simpler job than that of the Compression routine. All it has to* do is read in the counts that have been stored in the compressed* file, then build the Huffman tree. The data can then be expanded* by reading in a bit at a time from the compressed file. Finally,* the node array is freed and the routine returns.**/void ExpandFile( input, output, argc, argv )BIT_FILE *input;FILE *output;int argc;char *argv[];{ NODE *nodes; int root_node; if ( ( nodes = (NODE *) calloc( 514, sizeof( NODE ) ) ) == NULL ) fatal_error( "Error allocating nodes array\n" ); input_counts( input, nodes ); root_node = build_tree( nodes ); if ( argc > 0 && strcmp( argv[ 0 ], "-d" ) == 0 ) print_model( nodes, 0 ); expand_data( input, output, nodes, root_node ); free( (char *) nodes );}/** In order for the compressor to build the same model, I have to* store the symbol counts in the compressed file so the expander can* read them in. In order to save space, I don't save all 256 symbols* unconditionally. The format used to store counts looks like this:** start, stop, counts, start, stop, counts, ... 0** This means that I store runs of counts, until all the non-zero* counts have been stored. At this time the list is terminated by* storing a start value of 0. Note that at least 1 run of counts has* to be stored, so even if the first start value is 0, I read it in.* It also means that even in an empty file that has no counts, I have* to pass at least one count, which will have a value of 0.** In order to efficiently use this format, I have to identify runs of* non-zero counts. Because of the format used, I don't want to stop a* run because of just one or two zeros in the count stream. So I have* to sit in a loop looking for strings of three or more zero values* in a row.** This is simple in concept, but it ends up being one of the most* complicated routines in the whole program. A routine that just* writes out 256 values without attempting to optimize would be much* simpler, but would hurt compression quite a bit on small files.**/void output_counts ( output, nodes )BIT_FILE *output;NODE *nodes;{ int first; int last; int next; int i; first = 0; while ( first < 255 && nodes[ first ].count == 0 ) first++;/** Each time I hit the start of the loop, I assume that first is the* start of a run of non-zero values. The rest of the loop is* concerned with finding the value for last, which is the end of the* run, and the value of next, which is the start of the next run.* At the end of the loop, I assign next to first, so it starts in on* the next run.*/ for ( ; first < 256 ; first = next) { last = first + 1; for ( ; ; ) { for ( ; last < 256 ; last ++ ) if ( nodes[ last ].count == 0 ) break; last--; for ( next = last + 1; next < 256 ; next++ ) if ( nodes[ next ]. count ! = 0 ) break; if ( next > 255 ) break; if ( ( next - last ) > 3 ) break; last = next; };/** Here is where I output first, last, and all the counts in between.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -