📄 lzrw2.c
字号:
/******************************************************************************//* *//* LZRW2.C *//* *//******************************************************************************//* *//* Author : Ross Williams. *//* Date : 29-Jun-1991. *//* Release : 1. *//* *//******************************************************************************//* *//* This file contains an implementation of the LZRW2 data compression *//* algorithm in C. *//* *//* The algorithm is a general purpose compression algorithm that runs fast *//* and gives reasonable compression. The algorithm is a member of the Lempel *//* Ziv family of algorithms and bases its compression on the presence in the *//* data of repeated substrings. *//* *//* This algorithm is unpatented and the code is public domain. As the *//* algorithm is based on the LZ77 class of algorithms, it is unlikely to be *//* the subject of a patent challenge. *//* *//* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW2 algorithm is *//* deterministic and is guaranteed to yield the same compressed *//* representation for a given file each time it is run. *//* *//* The LZRW2 algorithm was originally designed and implemented *//* by Ross Williams on 25-Nov-1990. *//* *//* Here are the results of applying this code compiled under THINK C 4.0 and *//* running on a Mac-SE (8MHz 68000) to the standard calgary corpus. *//* *//* +----------------------------------------------------------------+ *//* | DATA COMPRESSION TEST | *//* | ===================== | *//* | Time of run : Sat 29-Jun-1991 01:24PM | *//* | Timing accuracy : One part in 100 | *//* | Context length : 262144 bytes (= 256.0000K) | *//* | Test suite : Calgary Corpus Suite | *//* | Files in suite : 14 | *//* | Algorithm : LZRW2 | *//* | Note: All averages are calculated from the un-rounded values. | *//* +----------------------------------------------------------------+ *//* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | *//* | ---------- ------ --- ------ ----- ---- ------- ------- | *//* | rpus:Bib.D 111261 1 58726 52.8 4.22 16.98 52.36 | *//* | us:Book1.D 768771 3 491413 63.9 5.11 14.82 47.04 | *//* | us:Book2.D 610856 3 331932 54.3 4.35 17.10 51.28 | *//* | rpus:Geo.D 102400 1 84118 82.1 6.57 10.81 41.67 | *//* | pus:News.D 377109 2 215652 57.2 4.57 15.20 50.68 | *//* | pus:Obj1.D 21504 1 13032 60.6 4.85 13.13 50.15 | *//* | pus:Obj2.D 246814 1 119078 48.2 3.86 17.81 55.01 | *//* | s:Paper1.D 53161 1 28269 53.2 4.25 17.16 51.92 | *//* | s:Paper2.D 82199 1 46789 56.9 4.55 16.58 49.96 | *//* | rpus:Pic.D 513216 2 123311 24.0 1.92 33.17 71.63 | *//* | us:Progc.D 39611 1 19959 50.4 4.03 17.65 53.36 | *//* | us:Progl.D 71646 1 28571 39.9 3.19 22.63 59.13 | *//* | us:Progp.D 49379 1 19544 39.6 3.17 22.52 59.45 | *//* | us:Trans.D 93695 1 35364 37.7 3.02 22.87 60.89 | *//* +----------------------------------------------------------------+ *//* | Average 224401 1 115411 51.5 4.12 18.46 53.89 | *//* +----------------------------------------------------------------+ *//* *//******************************************************************************/ /* INCLUDE FILES */ /* ============= *///#include "port.h" /* Defines symbols for the non portable stuff. *///#include "compress.h" /* Defines single exported function "compress". *///#include "fast_copy.h" /* Fast memory copy routine. *//******************************************************************************//* The following structure is returned by the "compress" function below when *//* the user asks the function to return identifying information. *//* The most important field in the record is the working memory field which *//* tells the calling program how much working memory should be passed to *//* "compress" when it is called to perform a compression or decompression. *//* For the LZRW2 algorithm, the decompressor requires less memory than the *//* decompressor (so I have defined two constants) but the interface standard *//* I am using only allows a single memory size for both compression and *//* decompression so I put in the larger: CMP_MEM_REQ. *//* Note: CMP_MEM_REQ~=24K, DEC_MEM_REQ~=16K. *//* For more information on this structure see "compress.h". */ #define U(X) ((ULONG) X)#define SIZE_P_BYTE (U(sizeof(UBYTE *)))#define SIZE_WORD (U(sizeof(UWORD )))#define ALIGNMENT_FUDGE (U(100))#define CMP_MEM_REQ ( U(4096)*(SIZE_P_BYTE+SIZE_WORD) + ALIGNMENT_FUDGE )#define DEC_MEM_REQ ( U(4096)*(SIZE_P_BYTE ) + ALIGNMENT_FUDGE )static struct compress_identity identity ={ U(0x3D8F733A), /* Algorithm identification number. */ CMP_MEM_REQ, /* Working memory (bytes) required. */ "LZRW2", /* Name of algorithm. */ "1.0", /* Version number of algorithm. */ "25-Nov-1990", /* Date of algorithm. */ "Public Domain", /* Copyright notice. */ "Ross N. Williams", /* Author of algorithm. */ "Renaissance Software", /* Affiliation of author. */ "Public Domain" /* Vendor of algorithm. */}; void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);void compress_decompress(UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);/******************************************************************************//* This function is the only function exported by this module. *//* Depending on its first parameter, the function can be requested to *//* compress a block of memory, decompress a block of memory, or to identify *//* itself. For more information, see the specification file "compress.h". */EXPORT void compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)UWORD action; /* Action to be performed. */UBYTE *wrk_mem; /* Address of working memory we can use. */UBYTE *src_adr; /* Address of input data. */ULONG src_len; /* Length of input data. */UBYTE *dst_adr; /* Address to put output data. */ULONG *p_dst_len; /* Address of longword for length of output data. */{ switch (action) { case COMPRESS_ACTION_IDENTITY: *p_dst_len=(ULONG) &identity; break; case COMPRESS_ACTION_COMPRESS: compress_compress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; case COMPRESS_ACTION_DECOMPRESS: compress_decompress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; }}/******************************************************************************//* *//* BRIEF DESCRIPTION OF THE LZRW2 ALGORITHM *//* ======================================== *//* The LZRW2 algorithm is identical to the LZRW1-A algorithm except that it *//* employs a PHRASE TABLE. The phrase table contains pointers to the first *//* byte of the most recent 4096 phrases processed. A phrase is defined to be *//* a sequence of one or more bytes that are coded as a single literal or copy *//* item. Instead of coding a copy item as a length and an offset as LZRW1 *//* does, LZRW2 codes a copy item as a length and a phrase table index. The *//* result is that LZRW2 can point far further back than LZRW1 but without *//* increasing the number of bits allocated to the 'offset' in the coding. The *//* phrase table is incapable of pointing within phrases, but LZRW1 was *//* incapabale of doing that anyway because it only ever updated its hash *//* table on phrase boundaries. *//* *//* Updating the phrase table involves just writing a pointer to the next *//* position (which circulates) in the phrase table after each literal or *//* copy item is coded. The decompressor needs to maintain a phrase table but *//* not a hash table. *//* *//* Use of the phrase table yields a 3% absolute to 5% absolute improvement *//* over LZRW1-A in compression, does not affect compression speed, but *//* reduces decompression speed by about 30%. Thus LZRW2 does not dominate *//* LZRW1-A, as LZRW1-A does LZRW1. *//* *//* An extra 3% absolute compression can be obtained by using a hash table of *//* depth two. However, increasing the depth above one incurs a 40% decrease *//* in compression speed which was not considered worthwhile. Another reason *//* for keeping the depth down to one was to allow easy comparison with the *//* LZRW1-A algorithm so as to demonstrate the exact effect of the phrase *//* table. *//* *//* +---+ +---+ *//* |___|4095 |___|4095 *//* |___| |___| *//* Next-->|___| +-------*_|<---+ /----+---\ *//* (circles |___| | |___| +---|Hash | *//* through +---*_|<------+ |___| |Function| *//* phrase | |___| |___| \--------/ *//* table) | |___|0 |___|0 ^ *//* | +---+ +---+ | *//* | Phrase Hash +-----+ *//* | Table Table | *//* | --- *//* v ^^^ *//* +-------------------------------------|----------------+ *//* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| *//* +-------------------------------------|----------------+ *//* | |1......18| | *//* |<------- Lempel=History ------------>|<--Ziv-->| | *//* | (=bytes already processed) |<-Still to go-->| *//* |<-------------------- INPUT BLOCK ------------------->| *//* *//* LZRW2 Algorithm Execution Summary *//* --------------------------------- *//* 1. Hash the first three bytes of the Ziv to yield a hash table index h. *//* 2. Look up the hash table yielding phrase table index i. *//* 3. Look up phrase i yielding a pointer p to the Lempel. *//* 4. Overwrite the 'next' cyclic entry in the phrase table with a pointer *//* to the Ziv. Increment the 'next' index (mod 4096). *//* 5. Overwrite hash table entry h with the index of the overwritten *//* position of step 4. *//* 6. Match where p points with the Ziv. If there is a match of three or *//* more bytes, code those bytes (in the Ziv) as a copy item, otherwise *//* code the next byte in the Ziv as a literal item. *//* *//******************************************************************************//* *//* DEFINITION OF COMPRESSED FILE FORMAT *//* ==================================== *//* * A compressed file consists of a COPY FLAG followed by a REMAINDER. *//* * The copy flag CF uses up four bytes with the first byte being the *//* least significant. *//* * If CF=1, then the compressed file represents the remainder of the file *//* exactly. Otherwise CF=0 and the remainder of the file consists of zero *//* or more GROUPS, each of which represents one or more bytes. *//* * Each group consists of two bytes of CONTROL information followed by *//* sixteen ITEMs except for the last group which can contain from one *//* to sixteen items. *//* * An item can be either a LITERAL item or a COPY item. *//* * Each item corresponds to a bit in the control bytes. *//* * The first control byte corresponds to the first 8 items in the group *//* with bit 0 corresponding to the first item in the group and bit 7 to *//* the eighth item in the group. *//* * The second control byte corresponds to the second 8 items in the group *//* with bit 0 corresponding to the ninth item in the group and bit 7 to */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -