⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lzrw3-a.c

📁 一个基于lZW压缩算法的高效实现
💻 C
📖 第 1 页 / 共 4 页
字号:
 /* p_src and p_dst step through the source and destination blocks.           */ UBYTE *p_src = p_src_first; UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the            */ /* calculations that determine when the main loop terminates.                */ UBYTE *p_src_post  = p_src_first+src_len; UBYTE *p_dst_post  = p_dst_first+src_len; UBYTE *p_src_max1  = p_src_first+src_len-MAX_RAW_ITEM; UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; /* The variables 'p_control' and 'control' are used to buffer control bits.  */ /* Before each group is processed, the next two bytes of the output block    */ /* are set aside for the control word for the group about to be processed.   */ /* 'p_control' is set to point to the first byte of that word. Meanwhile,    */ /* 'control' buffers the control bits being generated during the processing  */ /* of the group. Instead of having a counter to keep track of how many items */ /* have been processed (=the number of bits in the control word), at the     */ /* start of each group, the top word of 'control' is filled with 1 bits.     */ /* As 'control' is shifted for each item, the 1 bits in the top word are     */ /* absorbed or destroyed. When they all run out (i.e. when the top word is   */ /* all zero bits, we know that we are at the end of a group.                 */ #define TOPWORD 0xFFFF0000 UBYTE *p_control; ULONG control=TOPWORD; /* The variable 'hash' always points to the first element of the hash table. */ UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); /* The following two variables represent the literal buffer. p_h1 points to  */ /* the partition (i.e. the zero'th (first) element of the partition)         */ /* corresponding to the youngest literal. p_h2 points to the partition       */ /* corresponding to the second youngest literal.                             */ /* The value zero denotes an "empty" buffer value with p_h1=0 => p_h2=0.     */ UBYTE **p_h1=0; UBYTE **p_h2=0; /* The following variable holds the current 'cycle' value. This value cycles */ /* through the range [0,HASH_TABLE_DEPTH-1], being incremented every time    */ /* the hash table is updated. The value gives the within-partition number of */ /* the next pointer to be overwritten. The decompressor maintains a cycle    */ /* value in synchrony.                                                       */ UCARD cycle=0; /* To start, we write the flag bytes. Being optimistic, we set the flag to   */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the      */ /* algorithm deterministic.                                                  */ *p_dst++=FLAG_COMPRESS; {UCARD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} /* Reserve the first word of output as the control word for the first group. */ /* Note: This is undone at the end if the input block is empty.              */ p_control=p_dst; p_dst+=2; /* Initialize all elements of the hash table to point to a constant string.  */ /* Use of an unrolled loop speeds this up considerably.                      */ /* These variables should really be declared "register", but I am worried    */ /* about the possibility that extra register declarations will tempt stupid  */ /* compilers to allocate all registers before they get to the innermostloop. */ {UCARD i; UBYTE **p_h=hash;  #define ZH *p_h++=START_STRING_18  for (i=0;i<256;i++)     /* 256=HASH_TABLE_LENGTH/16. */    {ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;} } /* The main loop processes either 1 or 16 items per iteration. As its        */ /* termination logic is complicated, I have opted for an infinite loop       */ /* structure containing 'break' and 'goto' statements.                       */ while (TRUE)   {/* Begin main processing loop. */    /* Note: All the variables here except unroll should be defined within    */    /*       the inner loop. Unfortunately the loop hasn't got a block.       */     UBYTE *p_ziv;              /* Points to first byte of current Ziv.       */     UCARD unroll;              /* Loop counter for unrolled inner loop.      */     UCARD index;               /* Index of current partition.                */     UBYTE **p_h0;              /* Pointer to current partition.              */     register UCARD d;          /* Depth looping variable.                    */     register UCARD bestlen;    /* Holds the best length seen so far.         */     register UCARD bestpos;    /* Holds number of best pointer seen so far.  */    /* Test for overrun and jump to overrun code if necessary.                */    if (p_dst>p_dst_post)       goto overrun;    /* The following cascade of if statements efficiently catches and deals   */    /* with varying degrees of closeness to the end of the input block.       */    /* When we get very close to the end, we stop updating the table and      */    /* code the remaining bytes as literals. This makes the code simpler.     */    unroll=16;    if (p_src>p_src_max16)      {       unroll=1;       if (p_src>p_src_max1)         {          if (p_src==p_src_post)             break;          else             {p_h0=&hash[ANY_HASH_INDEX]; /* Avoid undefined pointer. */              goto literal;}         }      }    /* This inner unrolled loop processes 'unroll' (whose value is either 1   */    /* or 16) items. I have chosen to implement this loop with labels and     */    /* gotos to heighten the ease with which the loop may be implemented with */    /* a single decrement and branch instruction in assembly language and     */    /* also because the labels act as highly readable place markers.          */    /* (Also because we jump into the loop for endgame literals (see above)). */    begin_unrolled_loop:       p_ziv=p_src;       /* To process the next phrase, we hash the next three bytes to obtain  */       /* an index to the zeroth (first) pointer in a target partition. We    */       /* get the pointer.                                                    */       index=HASH(p_src);       p_h0=&hash[index];       /* This next part runs through the pointers in the partition matching  */       /* the bytes they point to in the Lempel with the bytes in the Ziv.    */       /* The length (bestlen) and within-partition pointer number (bestpos)  */       /* of the longest match so far is maintained and is the output of this */       /* segment of code. The s[bestlen]==... is an optimization only.       */       bestlen=0;       bestpos=0;       for (d=0;d<HASH_TABLE_DEPTH;d++)         {          register UBYTE *s=p_src;          register UBYTE *p=p_h0[d];          register UCARD len;          if (s[bestlen] == p[bestlen])            {             #define PS *p++!=*s++             PS || PS || PS || PS || PS || PS || PS || PS || PS ||             PS || PS || PS || PS || PS || PS || PS || PS || PS || s++;             len=s-p_src-1;             if (len>bestlen)               {                bestpos=d;                bestlen=len;               }            }         }       /* The length of the longest match determines whether we code a */       /* literal item or a copy item.                                 */       if (bestlen<3)         {          /* Literal. */          /* Code the literal byte as itself and a zero control bit.          */          literal: *p_dst++=*p_src++; control&=0xFFFEFFFF;          /* We have just coded a literal. If we had two pending ones, that   */          /* makes three and we can update the hash table.                    */          if (p_h2!=0)             {UPDATE_P(p_h2,p_ziv-2);}          /* In any case, rotate the hash table pointers for next time. */          p_h2=p_h1; p_h1=p_h0;         }       else         {          /* Copy */          /* To code a copy item, we construct a hash table index of the      */          /* winning pointer (index+=bestpos) and code it and the best length */          /* into a 2 byte code word. Bump up p_src.                          */          index+=bestpos;          *p_dst++=((index&0xF00)>>4)|(bestlen-3);          *p_dst++=index&0xFF;          p_src+=bestlen;          /* As we have just coded three bytes, we are now in a position to   */          /* update the hash table with the literal bytes that were pending   */          /* upon the arrival of extra context bytes.                         */          if (p_h1!=0)            {             if (p_h2!=0)               {UPDATE_P(p_h2,p_ziv-2); p_h2=0;}             UPDATE_P(p_h1,p_ziv-1); p_h1=0;            }          /* In any case, we can update the hash table based on the current   */          /* position as we just coded at least three bytes in a copy items.  */          UPDATE_P(p_h0,p_ziv);         }       control>>=1;       /* This loop is all set up for a decrement and jump instruction! */    end_unrolled_loop: if (--unroll) goto begin_unrolled_loop;    /* At this point it will nearly always be the end of a group in which     */    /* case, we have to do some control-word processing. However, near the    */    /* end of the input block, the inner unrolled loop is only executed once. */    /* This necessitates the 'if' test.                                       */    if ((control&TOPWORD)==0)      {       /* Write the control word to the place we saved for it in the output. */       *p_control++=  control     &0xFF;       *p_control  = (control>>8) &0xFF;       /* Reserve the next word in the output block for the control word */       /* for the group about to be processed.                           */       p_control=p_dst; p_dst+=2;       /* Reset the control bits buffer. */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -