📄 lzrw5.txt
字号:
You can be sure of not reaching this limit by only feeding in inputfiles of at most length 65536. The program does not contain anassertion for this condition.NOTE: This code is supplied more for exposition than execution./******************************************************************************//* *//* LZRW5.C *//* *//******************************************************************************//* *//* Author : Ross Williams. *//* Date : 17-Jul-1991. *//* Release : 1. *//* *//******************************************************************************/ /* INCLUDE FILES */ /* ============= */#include "port.h" /* Defines symbols for the non portable stuff. */#include "compress.h" /* Defines single exported function "compress". */#include "fast_copy.h" /* Fast memory copy routine. */#include <stdio.h>#include <stdlib.h>#include "assert.h"/******************************************************************************//* When I first started to get concerned about the portability of my C code, *//* I switched over to using only macro defined types UBYTE, UWORD, ULONG and *//* one or two others. While, these are useful for most purposes, they impair *//* efficiency as, if I have a variable whose range will be [0,1000], I will *//* declare it as a UWORD. This will translate into (say) "short int" and *//* hence may be less efficient than just an "int" which represents the *//* natural size of the machine. Before releasing LZRW3-A, I realized this *//* mistake. Unfortunately, I can't access the ftp archive with my portability *//* header in it in time for this algorithm's release and so I am including an *//* extra definition. The definition UCARD stands for an unsigned (cardinal) *//* type that can hold values in the range [0,32767]. This is within the ANSI *//* range of a standard int or unsigned. No assumption about overflow of this *//* type is made in the code (i.e. all usages are within range and I do not *//* use the value -1 to detect the end of loops.). *//* You can use either "unsigned" or just "int" here depending on which is *//* more efficient in your environment (both the same probably). */#define UCARD unsignedstruct node_t { UBYTE *p_phrase; /* 4 bytes. Pointer to phrase represented by the node. */ UBYTE len_phrase; /* 1 byte. Length of the phrase. */ /* UBYTE same; 1 byte. Length of prefix shared by parent node. */ /* UWORD parent; 2 bytes. Phrase number of parent phrase. */ UWORD left; /* 2 bytes. Phrase number of left child phrase. */ UWORD right; /* 2 bytes. Phrase number of right child phrase. */ /* -------- */ /* 12 bytes is the ideal (no padding) structure length. */ };#define MAX_NODES (1L<<16)#define MAX_PHRASE_LENGTH (255L)#define ALPHABET_SIZE (256L)#define TEST_ARRAY_ITEMS 4typedef struct node_t test_node_array[TEST_ARRAY_ITEMS];#define U(X) ((ULONG) X)#define SIZE_P_BYTE (U(sizeof(UBYTE *)))#define ALIGNMENT_FUDGE (U(64L))/* Calculate the memory required by the compressor. */#define MEM_NODE_TABLE (MAX_NODES*(sizeof(test_node_array)/TEST_ARRAY_ITEMS))#define MEM_ALPHA_TABLE (256L)#define MEM_COMPRESS (MEM_NODE_TABLE + \ MEM_ALPHA_TABLE + \ ALIGNMENT_FUDGE)/* Calculate the memory required by the decompressor. */#define MEM_PHRASE_TABLE (MAX_NODES*SIZE_P_BYTE)#define MEM_LENGTH_TABLE (MAX_NODES*1L)#define MEM_DECOMPRESS (MEM_PHRASE_TABLE + \ MEM_LENGTH_TABLE + \ MEM_ALPHA_TABLE + \ ALIGNMENT_FUDGE)#define MEM_REQ (MEM_COMPRESS>MEM_DECOMPRESS?MEM_COMPRESS:MEM_DECOMPRESS)static struct compress_identity identity ={ U(0x4A619944), /* Algorithm identification number. */ MEM_REQ, /* Working memory (bytes) required. */ "LZRW5", /* Name of algorithm. */ "1.0", /* Version number of algorithm. */ "17-Jul-1990", /* Date of algorithm. */ "Public Domain", /* Copyright notice. */ "Ross N. Williams", /* Author of algorithm. */ "Renaissance Software", /* Affiliation of author. */ "Public Domain" /* Vendor of algorithm. */};LOCAL int compare_strings(UBYTE *,UCARD,UBYTE *,UCARD);LOCAL void compress_compress (UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);LOCAL void compress_decompress(UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);/******************************************************************************//* This function is the only function exported by this module. *//* Depending on its first parameter, the function can be requested to *//* compress a block of memory, decompress a block of memory, or to identify *//* itself. For more information, see the specification file "compress.h". */EXPORT void compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)UWORD action; /* Action to be performed. */UBYTE *wrk_mem; /* Address of working memory we can use. */UBYTE *src_adr; /* Address of input data. */ULONG src_len; /* Length of input data. */UBYTE *dst_adr; /* Address to put output data. */ULONG *p_dst_len; /* Address of longword for length of output data. */{ switch (action) { case COMPRESS_ACTION_IDENTITY: *p_dst_len=(ULONG) &identity; break; case COMPRESS_ACTION_COMPRESS: compress_compress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; case COMPRESS_ACTION_DECOMPRESS: compress_decompress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len); break; }}/******************************************************************************//* The following #define defines the length of the copy flag that appears at *//* the start of the compressed file. The value of four bytes was chosen *//* because the fast_copy routine on my Macintosh runs faster if the source *//* and destination blocks are relatively longword aligned. *//* The actual flag data appears in the first byte. The rest are zeroed so as *//* to normalize the compressed representation (i.e. not non-deterministic). */#define FLAG_BYTES 4/* The following #defines define the meaning of the values of the copy *//* flag at the start of the compressed file. */#define FLAG_COMPRESS 0 /* Signals that output was result of compression. */#define FLAG_COPY 1 /* Signals that output was simply copied over. *//* The 68000 microprocessor (on which this algorithm was originally developed *//* is fussy about non-aligned arrays of words. To avoid these problems the *//* following macro can be used to "waste" from 0 to 3 bytes so as to align *//* the argument pointer. */#define ULONG_ALIGN_UP(X) ((((ULONG)X)+3)&~3)/******************************************************************************/LOCAL int compare_strings(p_s1,len1,p_s2,len2)/* Compares the two strings lexicographically and returns: *//* -1 if s1<s2 *//* 0 if s1==s2 *//* 1 if s1>s2 */register UBYTE *p_s1,*p_s2; /* Pointers to first byte of each string. */ UCARD len1,len2; /* Number of bytes in each string. */{ register UCARD minlen = len1<len2 ? len1 : len2; register UCARD i; for (i=0;i<minlen;i++) if (*p_s1++!=*p_s2++) if (*--p_s1<*--p_s2) return -1; else return 1; if (len1<len2) return -1; if (len1>len2) return 1; return 0;}LOCAL void compress_compress (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)/* Input : Hand over the required amount of working memory in p_wrk_mem. *//* Input : Specify input block using p_src_first and src_len. *//* Input : Point p_dst_first to the start of the output zone (OZ). *//* Input : Point p_dst_len to a ULONG to receive the output length. *//* Input : Input block and output zone must not overlap. *//* Output : Length of output block written to *p_dst_len. *//* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May *//* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*//* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */UBYTE *p_wrk_mem;UBYTE *p_src_first;ULONG src_len;UBYTE *p_dst_first;ULONG *p_dst_len;{ /* p_src and p_dst step through the source and destination blocks. */ UBYTE *p_src = p_src_first; UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the */ /* calculations that determine when the main loop terminates. */ UBYTE *p_src_post = p_src_first+src_len; UBYTE *p_dst_post = p_dst_first+src_len; UBYTE *p_src_max_256 = p_src_first+src_len-256; struct node_t *node; UBYTE **pointer; UBYTE *length; UBYTE *alphabet; ULONG next; UCARD i; node = (struct node_t *) ULONG_ALIGN_UP(p_wrk_mem); alphabet = (UBYTE *) &node[MAX_NODES]; /* Load the node table with nodes for the alphabet. */ for (i=0;i<ALPHABET_SIZE;i++) { alphabet[i]=i; node[i].p_phrase=&alphabet[i]; node[i].len_phrase=1; node[i].left=0; node[i].right=0; } next=256; /* To start, we write the flag bytes. Being optimistic, we set the flag to */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ /* algorithm deterministic. */ *p_dst++=FLAG_COMPRESS; {UCARD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} while (p_src<p_src_max_256) {/* Begin main processing loop. */ UWORD current; UWORD bestnode; UCARD bestlen; int cmp; UBYTE *p_ziv; if (p_dst>p_dst_post) goto overrun; p_ziv=p_src; /* Run down the tree matching against the Ziv. */ current=*p_ziv; bestnode=current; bestlen=1; while (TRUE) { UWORD t; struct node_t *pn=&node[current]; cmp=compare_strings(p_ziv,bestlen+1,pn->p_phrase,pn->len_phrase); if (cmp==-1) {if ((t=pn->left)==0) break; else current=t;} else if (cmp==1) {if ((t=pn->right)==0) break; else current=t;} else {bestnode=current; if (++bestlen==MAX_PHRASE_LENGTH) break;} } /* Assert: bestnode is the longest match with the Ziv. */ /* Assert: current is "append" leaf for the Ziv. Direction in cmp. */ /* Check that node contains a string that matches Ziv *//* DEBUG for (i=0;i<node[bestnode].len_phrase;i++) if (p_ziv[i]!=node[bestnode].p_phrase[i]) error("Best phrase does not match Ziv!!!!");*/ /* Transmit the node number. */ p_src+=node[bestnode].len_phrase; *p_dst++=bestnode>>8; /* Big endian order for 68000. */ *p_dst++=bestnode&0xFF; /* Delete the oldest phrase from the tree. */ /* <<<Skipped in first version>>> */ /* Note: must ignore nodes with a length of zero. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -