📄 lzrw3.c
字号:
/******************************************************************************//* *//* LZRW3.C *//* *//******************************************************************************//* *//* Author : Ross Williams. *//* Date : 30-Jun-1991. *//* Release : 1. *//* *//******************************************************************************//* *//* This file contains an implementation of the LZRW3 data compression *//* algorithm in C. *//* *//* The algorithm is a general purpose compression algorithm that runs fast *//* and gives reasonable compression. The algorithm is a member of the Lempel *//* Ziv family of algorithms and bases its compression on the presence in the *//* data of repeated substrings. *//* *//* This algorithm is unpatented and the code is public domain. As the *//* algorithm is based on the LZ77 class of algorithms, it is unlikely to be *//* the subject of a patent challenge. *//* *//* Unlike the LZRW1 and LZRW1-A algorithms, the LZRW3 algorithm is *//* deterministic and is guaranteed to yield the same compressed *//* representation for a given file each time it is run. *//* *//* The LZRW3 algorithm was originally designed and implemented *//* by Ross Williams on 31-Dec-1990. *//* *//* Here are the results of applying this code, compiled under THINK C 4.0 *//* and running on a Mac-SE (8MHz 68000), to the standard calgary corpus. *//* *//* +----------------------------------------------------------------+ *//* | DATA COMPRESSION TEST | *//* | ===================== | *//* | Time of run : Sun 30-Jun-1991 09:31PM | *//* | Timing accuracy : One part in 100 | *//* | Context length : 262144 bytes (= 256.0000K) | *//* | Test suite : Calgary Corpus Suite | *//* | Files in suite : 14 | *//* | Algorithm : LZRW3 | *//* | Note: All averages are calculated from the un-rounded values. | *//* +----------------------------------------------------------------+ *//* | File Name Length CxB ComLen %Remn Bits Com K/s Dec K/s | *//* | ---------- ------ --- ------ ----- ---- ------- ------- | *//* | rpus:Bib.D 111261 1 55033 49.5 3.96 19.46 32.27 | *//* | us:Book1.D 768771 3 467962 60.9 4.87 17.03 31.07 | *//* | us:Book2.D 610856 3 317102 51.9 4.15 19.39 34.15 | *//* | rpus:Geo.D 102400 1 82424 80.5 6.44 11.65 18.18 | *//* | pus:News.D 377109 2 205670 54.5 4.36 17.14 27.47 | *//* | pus:Obj1.D 21504 1 13027 60.6 4.85 13.40 18.95 | *//* | pus:Obj2.D 246814 1 116286 47.1 3.77 19.31 30.10 | *//* | s:Paper1.D 53161 1 27522 51.8 4.14 18.60 31.15 | *//* | s:Paper2.D 82199 1 45160 54.9 4.40 18.45 32.84 | *//* | rpus:Pic.D 513216 2 122388 23.8 1.91 35.29 51.05 | *//* | us:Progc.D 39611 1 19669 49.7 3.97 18.87 30.64 | *//* | us:Progl.D 71646 1 28247 39.4 3.15 24.34 40.66 | *//* | us:Progp.D 49379 1 19377 39.2 3.14 23.91 39.23 | *//* | us:Trans.D 93695 1 33481 35.7 2.86 25.48 40.37 | *//* +----------------------------------------------------------------+ *//* | Average 224401 1 110953 50.0 4.00 20.17 32.72 | *//* +----------------------------------------------------------------+ *//* *//******************************************************************************/ /* INCLUDE FILES */ /* ============= *///#include "port.h" /* Defines symbols for the non portable stuff. *///#include "compress.h" /* Defines single exported function "compress". *///#include "fast_copy.h" /* Fast memory copy routine. *//******************************************************************************//* The following structure is returned by the "compress" function below when *//* the user asks the function to return identifying information. *//* The most important field in the record is the working memory field which *//* tells the calling program how much working memory should be passed to *//* "compress" when it is called to perform a compression or decompression. *//* LZRW3 uses the same amount of memory during compression and decompression. *//* For more information on this structure see "compress.h". */ #define U(X) ((ULONG) X)#define SIZE_P_BYTE (U(sizeof(UBYTE *)))#define SIZE_WORD (U(sizeof(UWORD )))#define ALIGNMENT_FUDGE (U(16))#define MEM_REQ ( U(4096)*(SIZE_P_BYTE) + ALIGNMENT_FUDGE )static struct compress_identity identity ={ U(0x032DDEA8), /* Algorithm identification number. */ MEM_REQ, /* Working memory (bytes) required. */ "LZRW3", /* Name of algorithm. */ "1.0", /* Version number of algorithm. */ "31-Dec-1990", /* Date of algorithm. */ "Public Domain", /* Copyright notice. */ "Ross N. Williams", /* Author of algorithm. */ "Renaissance Software", /* Affiliation of author. */ "Public Domain" /* Vendor of algorithm. */}; void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);void compress_decompress(UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);/******************************************************************************//* This function is the only function exported by this module. *//* Depending on its first parameter, the function can be requested to *//* compress a block of memory, decompress a block of memory, or to identify *//* itself. For more information, see the specification file "compress.h". */EXPORT void compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)UWORD action; /* Action to be performed. */UBYTE *wrk_mem; /* Address of working memory we can use. */UBYTE *src_adr; /* Address of input data. */ULONG src_len; /* Length of input data. */UBYTE *dst_adr; /* Address to put output data. */ULONG *p_dst_len; /* Address of longword for length of output data. */{ switch (action) { case COMPRESS_ACTION_IDENTITY: *p_dst_len=(ULONG) &identity; break; case COMPRESS_ACTION_COMPRESS: compress_compress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; case COMPRESS_ACTION_DECOMPRESS: compress_decompress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; }}/******************************************************************************//* *//* BRIEF DESCRIPTION OF THE LZRW3 ALGORITHM *//* ======================================== *//* The LZRW3 algorithm is identical to the LZRW1-A algorithm except that *//* instead of transmitting history offsets, it transmits hash table indexes. *//* In order to decode the indexes, the decompressor must maintain an *//* identical hash table. Copy items are straightforward:when the decompressor *//* receives a copy item, it simply looks up the hash table to translate the *//* index into a pointer into the data already decompressed. To update the *//* hash table, it replaces the same table entry with a pointer to the start *//* of the newly decoded phrase. The tricky part is with literal items, for at *//* the time that the decompressor receives a literal item the decompressor *//* does not have the three bytes in the Ziv (that the compressor has) to *//* perform the three-byte hash. To solve this problem, in LZRW3, both the *//* compressor and decompressor are wired up so that they "buffer" these *//* literals and update their hash tables only when three bytes are available. *//* This makes the maximum buffering 2 bytes. *//* *//* Replacement of offsets by hash table indexes yields a few percent extra *//* compression at the cost of some speed. LZRW3 is slower than LZRW1, LZRW1-A *//* and LZRW2, but yields better compression. *//* *//* Extra compression could be obtained by using a hash table of depth two. *//* However, increasing the depth above one incurs a significant decrease in *//* compression speed which was not considered worthwhile. Another reason for *//* keeping the depth down to one was to allow easy comparison with the *//* LZRW1-A and LZRW2 algorithms so as to demonstrate the exact effect of the *//* use of direct hash indexes. *//* *//* +---+ *//* |___|4095 *//* |___| *//* +---------------------*_|<---+ /----+---\ *//* | |___| +---|Hash | *//* | |___| |Function| *//* | |___| \--------/ *//* | |___|0 ^ *//* | +---+ | *//* | Hash +-----+ *//* | Table | *//* | --- *//* v ^^^ *//* +-------------------------------------|----------------+ *//* |||||||||||||||||||||||||||||||||||||||||||||||||||||||| *//* +-------------------------------------|----------------+ *//* | |1......18| | *//* |<------- Lempel=History ------------>|<--Ziv-->| | *//* | (=bytes already processed) |<-Still to go-->| *//* |<-------------------- INPUT BLOCK ------------------->| *//* *//* The diagram above for LZRW3 looks almost identical to the diagram for *//* LZRW1. The difference is that in LZRW3, the compressor transmits hash *//* table indices instead of Lempel offsets. For this to work, the *//* decompressor must maintain a hash table as well as the compressor and both *//* compressor and decompressor must "buffer" literals, as the decompressor *//* cannot hash phrases commencing with a literal until another two bytes have *//* arrived. *//* *//* LZRW3 Algorithm Execution Summary *//* --------------------------------- *//* 1. Hash the first three bytes of the Ziv to yield a hash table index h. *//* 2. Look up the hash table yielding history pointer p. *//* 3. Match where p points with the Ziv. If there is a match of three or *//* more bytes, code those bytes (in the Ziv) as a copy item, otherwise *//* code the next byte in the Ziv as a literal item. *//* 4. Update the hash table as possible subject to the constraint that only *//* phrases commencing three bytes back from the Ziv can be hashed and *//* entered into the hash table. (This enables the decompressor to keep *//* pace). See the description and code for more details. *//* *//******************************************************************************//* *//* DEFINITION OF COMPRESSED FILE FORMAT *//* ==================================== *//* * A compressed file consists of a COPY FLAG followed by a REMAINDER. *//* * The copy flag CF uses up four bytes with the first byte being the *//* least significant. *//* * If CF=1, then the compressed file represents the remainder of the file *//* exactly. Otherwise CF=0 and the remainder of the file consists of zero *//* or more GROUPS, each of which represents one or more bytes. *//* * Each group consists of two bytes of CONTROL information followed by *//* sixteen ITEMs except for the last group which can contain from one *//* to sixteen items. *//* * An item can be either a LITERAL item or a COPY item. *//* * Each item corresponds to a bit in the control bytes. *//* * The first control byte corresponds to the first 8 items in the group *//* with bit 0 corresponding to the first item in the group and bit 7 to *//* the eighth item in the group. *//* * The second control byte corresponds to the second 8 items in the group *//* with bit 0 corresponding to the ninth item in the group and bit 7 to *//* the sixteenth item in the group. *//* * A zero bit in a control word means that the corresponding item is a *//* literal item. A one bit corresponds to a copy item. *//* * A literal item consists of a single byte which represents itself. *//* * A copy item consists of two bytes that represent from 3 to 18 bytes. *//* * The first byte in a copy item will be denoted C1. *//* * The second byte in a copy item will be denoted C2. *//* * Bits will be selected using square brackets. *//* For example: C1[0..3] is the low nibble of the first control byte. *//* of copy item C1. *//* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number *//* in the range [3,18]. *//* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -