📄 lzrw5.txt

📁 一个基于lZW压缩算法的高效实现
💻 TXT
📖 第 1 页 / 共 3 页
字号:
You can be sure of not reaching this limit by only feeding in inputfiles of at most length 65536. The program does not contain anassertion for this condition.NOTE: This code is supplied more for exposition than execution./******************************************************************************//*                                                                            *//*                                    LZRW5.C                                 *//*                                                                            *//******************************************************************************//*                                                                            *//* Author  : Ross Williams.                                                   *//* Date    : 17-Jul-1991.                                                     *//* Release : 1.                                                               *//*                                                                            *//******************************************************************************/                            /* INCLUDE FILES                                  */                            /* =============                                  */#include "port.h"           /* Defines symbols for the non portable stuff.    */#include "compress.h"       /* Defines single exported function "compress".   */#include "fast_copy.h"      /* Fast memory copy routine.                      */#include <stdio.h>#include <stdlib.h>#include "assert.h"/******************************************************************************//* When I first started to get concerned about the portability of my C code,  *//* I switched over to using only macro defined types UBYTE, UWORD, ULONG and  *//* one or two others. While, these are useful for most purposes, they impair  *//* efficiency as, if I have a variable whose range will be [0,1000], I will   *//* declare it as a UWORD. This will translate into (say) "short int" and      *//* hence may be less efficient than just an "int" which represents the        *//* natural size of the machine. Before releasing LZRW3-A, I realized this     *//* mistake. Unfortunately, I can't access the ftp archive with my portability *//* header in it in time for this algorithm's release and so I am including an *//* extra definition. The definition UCARD stands for an unsigned (cardinal)   *//* type that can hold values in the range [0,32767]. This is within the ANSI  *//* range of a standard int or unsigned. No assumption about overflow of this  *//* type is made in the code (i.e. all usages are within range and I do not    *//* use the value -1 to detect the end of loops.).                             *//* You can use either "unsigned" or just "int" here depending on which is     *//* more efficient in your environment (both the same probably).               */#define UCARD unsignedstruct node_t  {   UBYTE *p_phrase;   /*  4 bytes. Pointer to phrase represented by the node. */   UBYTE len_phrase;  /*  1 byte.  Length of the phrase.                      */   /* UBYTE same;          1 byte.  Length of prefix shared by parent node.    */   /* UWORD parent;        2 bytes. Phrase number of parent phrase.            */   UWORD left;        /*  2 bytes. Phrase number of left child phrase.        */   UWORD right;       /*  2 bytes. Phrase number of right child phrase.       */                      /* --------                                             */                      /* 12 bytes is the ideal (no padding) structure length. */  };#define MAX_NODES         (1L<<16)#define MAX_PHRASE_LENGTH (255L)#define ALPHABET_SIZE     (256L)#define TEST_ARRAY_ITEMS 4typedef struct node_t test_node_array[TEST_ARRAY_ITEMS];#define U(X)            ((ULONG) X)#define SIZE_P_BYTE     (U(sizeof(UBYTE *)))#define ALIGNMENT_FUDGE (U(64L))/* Calculate the memory required by the compressor. */#define MEM_NODE_TABLE (MAX_NODES*(sizeof(test_node_array)/TEST_ARRAY_ITEMS))#define MEM_ALPHA_TABLE  (256L)#define MEM_COMPRESS     (MEM_NODE_TABLE   + \                          MEM_ALPHA_TABLE  + \                          ALIGNMENT_FUDGE)/* Calculate the memory required by the decompressor. */#define MEM_PHRASE_TABLE (MAX_NODES*SIZE_P_BYTE)#define MEM_LENGTH_TABLE (MAX_NODES*1L)#define MEM_DECOMPRESS   (MEM_PHRASE_TABLE + \                          MEM_LENGTH_TABLE + \                          MEM_ALPHA_TABLE  + \                          ALIGNMENT_FUDGE)#define MEM_REQ (MEM_COMPRESS>MEM_DECOMPRESS?MEM_COMPRESS:MEM_DECOMPRESS)static struct compress_identity identity ={ U(0x4A619944),                           /* Algorithm identification number. */ MEM_REQ,                                 /* Working memory (bytes) required. */ "LZRW5",                                 /* Name of algorithm.               */ "1.0",                                   /* Version number of algorithm.     */ "17-Jul-1990",                           /* Date of algorithm.               */ "Public Domain",                         /* Copyright notice.                */ "Ross N. Williams",                      /* Author of algorithm.             */ "Renaissance Software",                  /* Affiliation of author.           */ "Public Domain"                          /* Vendor of algorithm.             */};LOCAL int compare_strings(UBYTE *,UCARD,UBYTE *,UCARD);LOCAL void compress_compress  (UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);LOCAL void compress_decompress(UBYTE *,UBYTE *,ULONG,UBYTE *,ULONG *);/******************************************************************************//* This function is the only function exported by this module.                *//* Depending on its first parameter, the function can be requested to         *//* compress a block of memory, decompress a block of memory, or to identify   *//* itself. For more information, see the specification file "compress.h".     */EXPORT void compress(action,wrk_mem,src_adr,src_len,dst_adr,p_dst_len)UWORD     action;      /* Action to be performed.                             */UBYTE   *wrk_mem;      /* Address of working memory we can use.               */UBYTE   *src_adr;      /* Address of input data.                              */ULONG    src_len;      /* Length  of input data.                              */UBYTE   *dst_adr;      /* Address to put output data.                         */ULONG *p_dst_len;      /* Address of longword for length of output data.      */{ switch (action)   {    case COMPRESS_ACTION_IDENTITY:       *p_dst_len=(ULONG) &identity;       break;    case COMPRESS_ACTION_COMPRESS:       compress_compress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len);       break;    case COMPRESS_ACTION_DECOMPRESS:       compress_decompress(wrk_mem,src_adr,src_len,dst_adr,p_dst_len);       break;   }}/******************************************************************************//* The following #define defines the length of the copy flag that appears at  *//* the start of the compressed file. The value of four bytes was chosen       *//* because the fast_copy routine on my Macintosh runs faster if the source    *//* and destination blocks are relatively longword aligned.                    *//* The actual flag data appears in the first byte. The rest are zeroed so as  *//* to normalize the compressed representation (i.e. not non-deterministic).   */#define FLAG_BYTES 4/* The following #defines define the meaning of the values of the copy        *//* flag at the start of the compressed file.                                  */#define FLAG_COMPRESS 0     /* Signals that output was result of compression. */#define FLAG_COPY     1     /* Signals that output was simply copied over.    *//* The 68000 microprocessor (on which this algorithm was originally developed *//* is fussy about non-aligned arrays of words. To avoid these problems the    *//* following macro can be used to "waste" from 0 to 3 bytes so as to align    *//* the argument pointer.                                                      */#define ULONG_ALIGN_UP(X) ((((ULONG)X)+3)&~3)/******************************************************************************/LOCAL int compare_strings(p_s1,len1,p_s2,len2)/* Compares the two strings lexicographically and returns:                    *//*    -1 if s1<s2                                                             *//*     0 if s1==s2                                                            *//*     1 if s1>s2                                                             */register UBYTE *p_s1,*p_s2;  /* Pointers to first byte of each string. */         UCARD len1,len2;    /* Number of bytes in each string.        */{ register UCARD minlen = len1<len2 ? len1 : len2; register UCARD i; for (i=0;i<minlen;i++)    if (*p_s1++!=*p_s2++)       if (*--p_s1<*--p_s2)          return -1;       else          return 1; if (len1<len2) return -1; if (len1>len2) return  1; return 0;}LOCAL void compress_compress           (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)/* Input  : Hand over the required amount of working memory in p_wrk_mem.     *//* Input  : Specify input block using p_src_first and src_len.                *//* Input  : Point p_dst_first to the start of the output zone (OZ).           *//* Input  : Point p_dst_len to a ULONG to receive the output length.          *//* Input  : Input block and output zone must not overlap.                     *//* Output : Length of output block written to *p_dst_len.                     *//* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May   *//* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*//* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES.        */UBYTE *p_wrk_mem;UBYTE *p_src_first;ULONG  src_len;UBYTE *p_dst_first;ULONG *p_dst_len;{ /* p_src and p_dst step through the source and destination blocks.           */ UBYTE *p_src = p_src_first; UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the            */ /* calculations that determine when the main loop terminates.                */ UBYTE *p_src_post  = p_src_first+src_len; UBYTE *p_dst_post  = p_dst_first+src_len; UBYTE *p_src_max_256 = p_src_first+src_len-256; struct node_t *node; UBYTE **pointer; UBYTE *length; UBYTE *alphabet; ULONG next; UCARD i; node     = (struct node_t *) ULONG_ALIGN_UP(p_wrk_mem); alphabet = (UBYTE *)  &node[MAX_NODES]; /* Load the node table with nodes for the alphabet. */ for (i=0;i<ALPHABET_SIZE;i++)   {    alphabet[i]=i;    node[i].p_phrase=&alphabet[i];    node[i].len_phrase=1;    node[i].left=0;    node[i].right=0;   } next=256; /* To start, we write the flag bytes. Being optimistic, we set the flag to   */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the      */ /* algorithm deterministic.                                                  */ *p_dst++=FLAG_COMPRESS; {UCARD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} while (p_src<p_src_max_256)   {/* Begin main processing loop. */    UWORD current;    UWORD bestnode;    UCARD bestlen;    int cmp;    UBYTE *p_ziv;    if (p_dst>p_dst_post) goto overrun;    p_ziv=p_src;    /* Run down the tree matching against the Ziv. */    current=*p_ziv;    bestnode=current;    bestlen=1;    while (TRUE)      {       UWORD t;       struct node_t *pn=&node[current];       cmp=compare_strings(p_ziv,bestlen+1,pn->p_phrase,pn->len_phrase);       if (cmp==-1)          {if ((t=pn->left)==0) break; else current=t;}       else          if (cmp==1)             {if ((t=pn->right)==0) break; else current=t;}          else             {bestnode=current;              if (++bestlen==MAX_PHRASE_LENGTH) break;}      }    /* Assert: bestnode is the longest match with the Ziv. */    /* Assert: current is "append" leaf for the Ziv. Direction in cmp. */    /* Check that node contains a string that matches Ziv *//* DEBUG    for (i=0;i<node[bestnode].len_phrase;i++)       if (p_ziv[i]!=node[bestnode].p_phrase[i])          error("Best phrase does not match Ziv!!!!");*/    /* Transmit the node number. */    p_src+=node[bestnode].len_phrase;    *p_dst++=bestnode>>8;               /* Big endian order for 68000. */    *p_dst++=bestnode&0xFF;    /* Delete the oldest phrase from the tree. */    /* <<<Skipped in first version>>> */    /* Note: must ignore nodes with a length of zero. */
💿 文件大小 105 K
👤 上传用户 zhangyuntong
📂 所属分类压缩解压
🏷️ 相关标签

#lZW #压缩算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -