📄 244-254.html
字号:
<!DOCTYPE HTML PUBLIC "html.dtd"><HTML><HEAD><TITLE>The Data Compression Book-:Sliding Window Compression</TITLE><META NAME="ROBOTS" CONTENT="NOINDEX, NOFOLLOW"><SCRIPT><!--function displayWindow(url, width, height) { var Win = window.open(url,"displayWindow",'width=' + width +',height=' + height + ',resizable=1,scrollbars=yes');}//--></SCRIPT></HEAD><BODY BGCOLOR="#FFFFFF" VLINK="#DD0000" TEXT="#000000" LINK="#DD0000" ALINK="#FF0000"><TD WIDTH="540" VALIGN="TOP"><!-- <CENTER><TABLE><TR><TD><FORM METHOD="GET" ACTION="http://search.itknowledge.com/excite/cgi-bin/AT-foldocsearch.cgi"><INPUT NAME="search" SIZE="20" VALUE=""><BR><CENTER><INPUT NAME="searchButton" TYPE="submit" VALUE="Glossary Search"></CENTER><INPUT NAME="source" TYPE="hidden" VALUE="local" CHECKED> <INPUT NAME="bltext" TYPE="hidden" VALUE="Back to Search"><INPUT NAME="sp" TYPE="hidden" VALUE="sp"></FORM></TD><TD><IMG SRC="http://www.itknowledge.com/images/dotclear.gif" WIDTH="15" HEIGHT="1"></TD><TD><FORM METHOD="POST" ACTION="http://search.itknowledge.com/excite/cgi-bin/AT-subscriptionsearch.cgi"><INPUT NAME="search" SIZE="20" VALUE=""><BR><CENTER><INPUT NAME="searchButton" TYPE="submit" VALUE=" Book Search "></CENTER><INPUT NAME="source" TYPE="hidden" VALUE="local" CHECKED> <INPUT NAME="backlink" TYPE="hidden" VALUE="http://search.itknowledge.com:80/excite/AT-subscriptionquery.html"><INPUT NAME="bltext" TYPE="hidden" VALUE="Back to Search"><INPUT NAME="sp" TYPE="hidden" VALUE="sp"></FORM></TD></TR></TABLE></CENTER> --><!-- ISBN=1558514341//--><!-- TITLE=The Data Compression Book-//--><!-- AUTHOR=Mark Nelson//--><!-- PUBLISHER=IDG Books Worldwide, Inc.//--><!-- IMPRINT=M & T Books//--><!-- CHAPTER=8//--><!-- PAGES=244-254//--><!-- UNASSIGNED1//--><!-- UNASSIGNED2//--><CENTER><TABLE BORDER><TR><TD><A HREF="242-244.html">Previous</A></TD><TD><A HREF="../ewtoc.html">Table of Contents</A></TD><TD><A HREF="../ch09/255-257.html">Next</A></TD></TR></TABLE></CENTER><P><BR></P><H3><A NAME="Heading21"></A><FONT COLOR="#000077">The Code</FONT></H3><!-- CODE //--><PRE>/*********************** Start of LZSS.C ************************* This is the LZSS module, which implements an LZ77 style compression* algorithm. As implemented here it uses a 12 bit index into the sliding* window, and a 4 bit length, which is adjusted to reflect phrase* lengths of between 2 and 17 bytes.*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "bitio.h"/** Various constants used to define the compression parameters. The* INDEX_BIT_COUNT tells how many bits we allocate to indices into the* text window. This directly determines the WINDOW_SIZE. The* LENGTH_BIT_COUNT tells how many bits we allocate for the length of* an encode phrase. This determines the size of the look-ahead buffer.* The TREE_ROOT is a special node in the tree that always points to* the root node of the binary phrase tree. END_OF_STREAM is a special* index used to flag the fact that the file has been completely* encoded, and there is no more data. UNUSED is the null index for* the tree. MOD_WINDOW() is a macro used to perform arithmetic on tree* indices.**/#define INDEX_BIT_COUNT 12#define LENGTH_BIT_COUNT 4#define WINDOW_SIZE ( 1 << INDEX_BIT_COUNT )#define RAW_LOOK_AHEAD_SIZE ( 1 << LENGTH_BIT_COUNT )#define BREAK_EVEN ( ( 1 + INDEX_BIT_COUNT + LENGTH_BIT_COUNT ) / 9 )#define LOOK_AHEAD_SIZE (RAW_LOOK_AHEAD_SIZE + BREAK_EVEN)#define TREE_ROOT WINDOW_SIZE#define END_OF_STREAM 0#define UNUSED 0#define MOD_WINDOW( a ) ( ( a ) & ( WINDOW_SIZE - 1 ) )char *CompressionName = "LZSS Encoder";char *Usage = "in-file out-file\n\n";/** These are the two global data structures used in this program.* The window[] array is exactly that, the window of previously seen* text, as well as the current look-ahead text. The tree[] structure* contains the binary tree of all of the strings in the window sorted* in order.*/unsigned char window WINDOW_SIZE ];struct { int parent; int smaller_child; int larger_child;} tree WINDOW_SIZE + 1 ];/** Function prototypes for both ANSI C compilers and their K&R brethren.*/#ifdef __STDC__void InitTree( int r );void ContractNode( int old_node, int new_node );void ReplaceNode( int old_node, int new_node );int FindNextNode( int node );void DeleteString( int p );int AddString( int new_node, int *match_position );void CompressFile( FILE *input, BIT_FILE *output, int argc, char *argv[] );void ExpandFile( BIT_FILE *input, FILE *output, int argc, char *argv[] );#elsevoid InitTree();void ContractNode();void ReplaceNode();int FindNextNode();void DeleteString();int AddString();void CompressFile();void ExpandFile();#endif/** Since the tree is static data, it comes up with every node* initialized to 0, which is good, since 0 is the UNUSED code.* However, to make the tree really usable, a single phrase has to be* added to the tree so it has a root node. That is done right here.*/void InitTree( r )int r;{ tree[ TREE_ROOT ].larger_child = r; tree[ r ].parent = TREE_ROOT; tree[ r ].larger_child = UNUSED; tree[ r ].smaller_child = UNUSED;}/** This routine is used when a node is being deleted. The link to* its descendant is broken by pulling the descendant in to overlay* the existing link.*/void ContractNode( old_node, new_node )int old_node;int new_node;{ tree[ new_node ].parent = tree[ old_node ]. parent; if ( tree[ tree[ old_node ].parent ].larger_child == old_node ) tree[ tree[ old_node ].parent ].larger_child = new_node; else tree[ tree[ old_node ].parent ].smaller_child = new_node; tree[ old_node ].parent = UNUSED;}/** This routine is also used when a node is being deleted. However,* in this case, it is being replaced by a node that was not previously* in the tree.*/void ReplaceNode( old_node, new_node )int old_node;int new_node;{ int parent; parent = tree[ old_node ].parent; if ( tree[ parent ].smaller_child == old_node ) tree[ parent ].smaller_child = new_node; else tree[ parent ].larger_child = new_node; tree[ new_node ] = tree[ old_node ]; tree[ tree[ new_node ].smaller_child ].parent = new_node; tree[ tree[ new_node ].larger_child ].parent = new_node; tree[ old_node ].parent = UNUSED;}/** This routine is used to find the next smallest node after the node* argument. It assumes that the node has a smaller child. We find* the next smallest child by going to the smaller_child node, then* going to the end of the larger_child descendant chain.*/int FindNextNode( node )int node;{ int next; next = tree[ node ].smaller_child; while ( tree[ next ].larger_child != UNUSED ) next = tree[ next ].larger_child; return( next );}/** This routine performs the classic binary tree deletion algorithm.* If the node to be deleted has a null link in either direction, we* just pull the non-null link up one to replace the existing link.* If both links exist, we instead delete the next link in order, which* is guaranteed to have a null link, then replace the node to be deleted* with the next link.*/void DeleteString( p )int p;{ int replacement; if ( tree[ p ].parent == UNUSED ) return; if ( tree[ p ].larger_child == UNUSED ) ContractNode( p, tree[ p ].smaller_child ); else if ( tree[ p ].smaller_child == UNUSED ) ContractNode( p, tree[ p ].larger_child ); else { replacement = FindNextNode( p ); DeleteString( replacement ); ReplaceNode( p, replacement ); }}/** This where most of the work done by the encoder takes place. This* routine is responsible for adding the new node to the binary tree.* It also has to find the best match among all the existing nodes in* the tree, and return that to the calling routine. To make matters* even more complicated, if the new_node has a duplicate in the tree,* the old_node is deleted, for reasons of efficiency.*/int AddString( new_node, match_position )int mew_node;int *match_position;{ int i; int test_node; int delta; int match_length; int *child; if ( new_node == END_OF_STREAM ) return( 0 ); test_node = tree[ TREE_ROOT ].larger_child; match_length = 0; for ( ; ; ) { for ( i = 0 ; i < LOOK_AHEAD_SIZE ; i++ ) { delta = window[ MOD_WINDOW( new_node + i ) ] - window[ MOD_WINDOW( test_node + i ) ]; if ( delta != 0 ) break; } if ( i >= match_length ) { match_length = i; *match_position = test_node; if ( match_length >= LOOK_AHEAD_SIZE ) { ReplaceNode( test_node, new_node ); return( match_length ); } } if ( delta >= 0 ) child = &tree[ test_node ]. larger_child; else child = &tree[ test_node ].smaller-child; if ( *child == UNUSED ) { *child = new_node; tree[ new_node ].parent = test_node; tree[ new_node ].larger_child = UNUSED; tree[ new_node ].smaller_child = UNUSED; return( match_length ); } test_node = *child; }}/** This is the compression routine. It has to first load up the look* ahead buffer, then go into the main compression loop. The main loop* decides whether to output a single character or an index/length* token that defines a phrase. Once the character or phrase has been* sent out, another loop has to run. The second loop reads in new* characters, deletes the strings that are overwritten by the new* character, then adds the strings that are created by the new* character.*/void CompressFile( input, output, argc, argv )FILE *input;BIT_FILE *output;int argc;char *argv[];{ int i; int c; int look_ahead_bytes; int current_position; int replace_count; int match_length; int match_position; current_position = 1; for ( i = 0 ; i < LOOK_AHEAD_SIZE ; i++ ) { if ( ( c = getc( input ) ) == EOF ) break; window[ current_position + i ] = (unsigned char) c; } look_ahead_bytes = 1; InitTree( current_position ); match_length = 0; match_position = 0; while ( look_ahead_bytes > 0 ) { if ( match_length > look_ahead_bytes ) match_length = look_ahead_bytes; if ( match_length <= BREAK_EVEN ) { replace_count = 1; OutputBit( output, 1 ); OutputBits( output, (unsigned long)window[ current_position ], 8 ); } else { OutputBit( output, 0 ); OutputBits( output, (unsigned long) match_position, INDEX_BIT_COUNT ); OutputBits( output, (unsigned long) ( match_length - ( BREAK_EVEN + 1 ) ), LENGTH_BIT_COUNT ); replace_count = match_length; } for ( i = 0 ; i < replace_count ; i++ ) { DeleteString( MOD_WINDOW( current_position + LOOK_AHEAD-SIZE ) ); if ( ( c = getc( input ) ) == EOF ) look_ahead_bytes--; else window[ MOD_WINDOW ( current_position + LOOK_AHEAD_SIZE ) ] = (unsigned char) c; current position = MOD_WINDOW( current_position + 1 ); if ( look ahead_bytes ) match_length = AddString( current_position, &match_position ); } } OutputBit( output, 0 ); OutputBits( output, (unsigned long) END_OF_STREAM, INDEX_BIT_COUNT); while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ );}/** This is the expansion routine for the LZSS algorithm. All it has* to do is read in flag bits, decide whether to read in a character or* a index/length pair, and take the appropriate action.*/void ExpandFile( input, output, argc, argv )BIT_FILE *input;FILE *output;int argc;char *argv[];{ int i; int current_position; int c; int match_length; int match_ position; current_position = 1; for ( ; ; ) { if ( InputBit( input ) ) { c = (int) InputBits( input, 8 ); putc( c, output ); window[ current_position ] = (unsigned char) c; current_position = MOD_WINDOW( current_position + 1 ); } else { match_position = (int) InputBits( input, INDEX_BIT_COUNT ); if ( match_position == END_OF_STREAM ) break; match_length = (int) InputBits( input, LENGTH_BIT_COUNT ); match_length += BREAK_EVEN; for ( i = 0 ; i <= match_length ; i++ ) { c = window[ MOD_WINDOW( match_position + i ) ]; putc( c, output ); window[ current_position ] = (unsigned char) c; current_position = MOD_WINDOW( current_position + 1 ); } } } while ( argc-- > 0 ) printf( "Unknown argument: %s\n", *argv++ );}/************************* End of LZSS.C *************************/</PRE><!-- END CODE //--><P><BR></P><CENTER><TABLE BORDER><TR><TD><A HREF="242-244.html">Previous</A></TD><TD><A HREF="../ewtoc.html">Table of Contents</A></TD><TD><A HREF="../ch09/255-257.html">Next</A></TD></TR></TABLE></CENTER></TD></TR></TABLE></BODY></HTML>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -