📄 lzrw3.c

📁 linux-2.6.15.6
💻 C
📖 第 1 页 / 共 3 页
字号:
/*          POS    is the index in the text of the character following the    *//*                   string represented by all the items preceeding the item  *//*                   being defined.                                           *//*          OFFSET is obtained from INDEX by looking up the hash table.       *//*                                                                            *//******************************************************************************//* The following #define defines the length of the copy flag that appears at  *//* the start of the compressed file. The value of four bytes was chosen       *//* because the fast_copy routine on my Macintosh runs faster if the source    *//* and destination blocks are relatively longword aligned.                    *//* The actual flag data appears in the first byte. The rest are zeroed so as  *//* to normalize the compressed representation (i.e. not non-deterministic).   */#define FLAG_BYTES 4/* The following #defines define the meaning of the values of the copy        *//* flag at the start of the compressed file.                                  */#define FLAG_COMPRESS 0     /* Signals that output was result of compression. */#define FLAG_COPY     1     /* Signals that output was simply copied over.    *//* The 68000 microprocessor (on which this algorithm was originally developed *//* is fussy about non-aligned arrays of words. To avoid these problems the    *//* following macro can be used to "waste" from 0 to 3 bytes so as to align    *//* the argument pointer.                                                      */#define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1))/* The following constant defines the maximum length of an uncompressed item. *//* This definition must not be changed; its value is hardwired into the code. *//* The longest number of bytes that can be spanned by a single item is 18     *//* for the longest copy item.                                                 */#define MAX_RAW_ITEM (18)/* The following constant defines the maximum length of an uncompressed group.*//* This definition must not be changed; its value is hardwired into the code. *//* A group contains at most 16 items which explains this definition.          */#define MAX_RAW_GROUP (16*MAX_RAW_ITEM)/* The following constant defines the maximum length of a compressed group.   *//* This definition must not be changed; its value is hardwired into the code. *//* A compressed group consists of two control bytes followed by up to 16      *//* compressed items each of which can have a maximum length of two bytes.     */#define MAX_CMP_GROUP (2+16*2)/* The following constant defines the number of entries in the hash table.    *//* This definition must not be changed; its value is hardwired into the code. */#define HASH_TABLE_LENGTH (4096)/* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable    *//* the compressor and decompressor to stay in step maintaining identical hash *//* tables. In an early version of the algorithm, the tables were simply       *//* initialized to zero and a check for zero was included just before the      *//* matching code. However, this test costs time. A better solution is to      *//* initialize all the entries in the hash table to point to a constant        *//* string. The decompressor does the same. This solution requires no extra    *//* test. The contents of the string do not matter so long as the string is    *//* the same for the compressor and decompressor and contains at least         *//* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not *//* have white space problems (e.g. there is no chance that the compiler will  *//* replace more than one space by a TAB) and because they make the length of  *//* the string obvious by inspection.                                          */#define START_STRING_18 ((UBYTE *) "123456789012345678")/* In this algorithm, hash values have to be calculated at more than one      *//* point. The following macro neatens the code up for this.                   */#define HASH(PTR) \   (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF)/******************************************************************************//* Input  : Hand over the required amount of working memory in p_wrk_mem.     *//* Input  : Specify input block using p_src_first and src_len.                *//* Input  : Point p_dst_first to the start of the output zone (OZ).           *//* Input  : Point p_dst_len to a ULONG to receive the output length.          *//* Input  : Input block and output zone must not overlap.                     *//* Output : Length of output block written to *p_dst_len.                     *//* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May   *//* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*//* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES.        */LOCAL void compress_compress(UBYTE *p_wrk_mem,			     UBYTE *p_src_first, ULONG  src_len,			     UBYTE *p_dst_first, LONG  *p_dst_len){ /* p_src and p_dst step through the source and destination blocks.           */ register UBYTE *p_src = p_src_first; register UBYTE *p_dst = p_dst_first;  /* The following variables are never modified and are used in the            */ /* calculations that determine when the main loop terminates.                */ UBYTE *p_src_post  = p_src_first+src_len; UBYTE *p_dst_post  = p_dst_first+src_len; UBYTE *p_src_max1  = p_src_first+src_len-MAX_RAW_ITEM; UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16;  /* The variables 'p_control' and 'control' are used to buffer control bits.  */ /* Before each group is processed, the next two bytes of the output block    */ /* are set aside for the control word for the group about to be processed.   */ /* 'p_control' is set to point to the first byte of that word. Meanwhile,    */ /* 'control' buffers the control bits being generated during the processing  */ /* of the group. Instead of having a counter to keep track of how many items */ /* have been processed (=the number of bits in the control word), at the     */ /* start of each group, the top word of 'control' is filled with 1 bits.     */ /* As 'control' is shifted for each item, the 1 bits in the top word are     */ /* absorbed or destroyed. When they all run out (i.e. when the top word is   */ /* all zero bits, we know that we are at the end of a group.                 */# define TOPWORD 0xFFFF0000 UBYTE *p_control; register ULONG control=TOPWORD;  /* THe variable 'hash' always points to the first element of the hash table. */ UBYTE **hash= (UBYTE **)  ULONG_ALIGN_UP(p_wrk_mem);  /* The following two variables represent the literal buffer. p_h1 points to  */ /* the hash table entry corresponding to the youngest literal. p_h2 points   */ /* to the hash table entry corresponding to the second youngest literal.     */ /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending      */ /* literal. The variables are initialized to zero meaning an empty "buffer". */ UBYTE **p_h1=NULL; UBYTE **p_h2=NULL;   /* To start, we write the flag bytes. Being optimistic, we set the flag to   */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the      */ /* algorithm deterministic.                                                  */ *p_dst++=FLAG_COMPRESS; {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} /* Reserve the first word of output as the control word for the first group. */ /* Note: This is undone at the end if the input block is empty.              */ p_control=p_dst; p_dst+=2;  /* Initialize all elements of the hash table to point to a constant string.  */ /* Use of an unrolled loop speeds this up considerably.                      */ {UWORD i; UBYTE **p_h=hash;#  define ZH *p_h++=START_STRING_18  for (i=0;i<256;i++)     /* 256=HASH_TABLE_LENGTH/16. */    {ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;     ZH;ZH;ZH;ZH;} } /* The main loop processes either 1 or 16 items per iteration. As its        */ /* termination logic is complicated, I have opted for an infinite loop       */ /* structure containing 'break' and 'goto' statements.                       */ while (TRUE)   {/* Begin main processing loop. */       /* Note: All the variables here except unroll should be defined within    */    /*       the inner loop. Unfortunately the loop hasn't got a block.       */     register UBYTE *p;         /* Scans through targ phrase during matching. */     register UBYTE *p_ziv= NULL ;     /* Points to first byte of current Ziv.       */     register UWORD unroll;     /* Loop counter for unrolled inner loop.      */     register UWORD index;      /* Index of current hash table entry.         */     register UBYTE **p_h0 = NULL ;     /* Pointer to current hash table entry.       */         /* Test for overrun and jump to overrun code if necessary.                */    if (p_dst>p_dst_post)       goto overrun;           /* The following cascade of if statements efficiently catches and deals   */    /* with varying degrees of closeness to the end of the input block.       */    /* When we get very close to the end, we stop updating the table and      */    /* code the remaining bytes as literals. This makes the code simpler.     */    unroll=16;    if (p_src>p_src_max16)      {       unroll=1;       if (p_src>p_src_max1)         {          if (p_src==p_src_post)             break;          else             goto literal;         }      }             /* This inner unrolled loop processes 'unroll' (whose value is either 1   */    /* or 16) items. I have chosen to implement this loop with labels and     */    /* gotos to heighten the ease with which the loop may be implemented with */    /* a single decrement and branch instruction in assembly language and     */    /* also because the labels act as highly readable place markers.          */    /* (Also because we jump into the loop for endgame literals (see above)). */        begin_unrolled_loop:           /* To process the next phrase, we hash the next three bytes and use    */       /* the resultant hash table index to look up the hash table. A pointer */       /* to the entry is stored in p_h0 so as to avoid an array lookup. The  */       /* hash table entry *p_h0 is looked up yielding a pointer p to a       */       /* potential match of the Ziv in the history.                          */       index=HASH(p_src);       p_h0=&hash[index];       p=*p_h0;              /* Having looked up the candidate position, we are in a position to    */       /* attempt a match. The match loop has been unrolled using the PS      */       /* macro so that failure within the first three bytes automatically    */       /* results in the literal branch being taken. The coding is simple.    */       /* p_ziv saves p_src so we can let p_src wander.                       */#       define PS *p++!=*p_src++       p_ziv=p_src;       if (PS || PS || PS)         {          /* Literal. */                    /* Code the literal byte as itself and a zero control bit.          */          p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF;                    /* We have just coded a literal. If we had two pending ones, that   */          /* makes three and we can update the hash table.                    */          if (p_h2!=0)             {*p_h2=p_ziv-2;}                       /* In any case, rotate the hash table pointers for next time. */          p_h2=p_h1; p_h1=p_h0;                   }       else         {          /* Copy */                    /* Match up to 15 remaining bytes using an unrolled loop and code. */#if 0          PS || PS || PS || PS || PS || PS || PS || PS ||          PS || PS || PS || PS || PS || PS || PS || p_src++;#else               if (               !( PS || PS || PS || PS || PS || PS || PS || PS ||                  PS || PS || PS || PS || PS || PS || PS )              ) p_src++;#endif          *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3);          *p_dst++=index&0xFF;                    /* As we have just coded three bytes, we are now in a position to   */          /* update the hash table with the literal bytes that were pending   */          /* upon the arrival of extra context bytes.                         */          if (p_h1!=0)            {             if (p_h2)               {*p_h2=p_ziv-2; p_h2=NULL;}             *p_h1=p_ziv-1; p_h1=NULL;            }                      /* In any case, we can update the hash table based on the current   */          /* position as we just coded at least three bytes in a copy items.  */          *p_h0=p_ziv;
💿 文件大小 48743 K
👤 上传用户 ATT320
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #15
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -