📄 lzrw3-a.c
字号:
/* p_src and p_dst step through the source and destination blocks. */ UBYTE *p_src = p_src_first; UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the */ /* calculations that determine when the main loop terminates. */ UBYTE *p_src_post = p_src_first+src_len; UBYTE *p_dst_post = p_dst_first+src_len; UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; /* The variables 'p_control' and 'control' are used to buffer control bits. */ /* Before each group is processed, the next two bytes of the output block */ /* are set aside for the control word for the group about to be processed. */ /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ /* 'control' buffers the control bits being generated during the processing */ /* of the group. Instead of having a counter to keep track of how many items */ /* have been processed (=the number of bits in the control word), at the */ /* start of each group, the top word of 'control' is filled with 1 bits. */ /* As 'control' is shifted for each item, the 1 bits in the top word are */ /* absorbed or destroyed. When they all run out (i.e. when the top word is */ /* all zero bits, we know that we are at the end of a group. */ #define TOPWORD 0xFFFF0000 UBYTE *p_control; ULONG control=TOPWORD; /* The variable 'hash' always points to the first element of the hash table. */ UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); /* The following two variables represent the literal buffer. p_h1 points to */ /* the partition (i.e. the zero'th (first) element of the partition) */ /* corresponding to the youngest literal. p_h2 points to the partition */ /* corresponding to the second youngest literal. */ /* The value zero denotes an "empty" buffer value with p_h1=0 => p_h2=0. */ UBYTE **p_h1=0; UBYTE **p_h2=0; /* The following variable holds the current 'cycle' value. This value cycles */ /* through the range [0,HASH_TABLE_DEPTH-1], being incremented every time */ /* the hash table is updated. The value gives the within-partition number of */ /* the next pointer to be overwritten. The decompressor maintains a cycle */ /* value in synchrony. */ UCARD cycle=0; /* To start, we write the flag bytes. Being optimistic, we set the flag to */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ /* algorithm deterministic. */ *p_dst++=FLAG_COMPRESS; {UCARD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} /* Reserve the first word of output as the control word for the first group. */ /* Note: This is undone at the end if the input block is empty. */ p_control=p_dst; p_dst+=2; /* Initialize all elements of the hash table to point to a constant string. */ /* Use of an unrolled loop speeds this up considerably. */ /* These variables should really be declared "register", but I am worried */ /* about the possibility that extra register declarations will tempt stupid */ /* compilers to allocate all registers before they get to the innermostloop. */ {UCARD i; UBYTE **p_h=hash; #define ZH *p_h++=START_STRING_18 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ {ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH;} } /* The main loop processes either 1 or 16 items per iteration. As its */ /* termination logic is complicated, I have opted for an infinite loop */ /* structure containing 'break' and 'goto' statements. */ while (TRUE) {/* Begin main processing loop. */ /* Note: All the variables here except unroll should be defined within */ /* the inner loop. Unfortunately the loop hasn't got a block. */ UBYTE *p_ziv; /* Points to first byte of current Ziv. */ UCARD unroll; /* Loop counter for unrolled inner loop. */ UCARD index; /* Index of current partition. */ UBYTE **p_h0; /* Pointer to current partition. */ register UCARD d; /* Depth looping variable. */ register UCARD bestlen; /* Holds the best length seen so far. */ register UCARD bestpos; /* Holds number of best pointer seen so far. */ /* Test for overrun and jump to overrun code if necessary. */ if (p_dst>p_dst_post) goto overrun; /* The following cascade of if statements efficiently catches and deals */ /* with varying degrees of closeness to the end of the input block. */ /* When we get very close to the end, we stop updating the table and */ /* code the remaining bytes as literals. This makes the code simpler. */ unroll=16; if (p_src>p_src_max16) { unroll=1; if (p_src>p_src_max1) { if (p_src==p_src_post) break; else {p_h0=&hash[ANY_HASH_INDEX]; /* Avoid undefined pointer. */ goto literal;} } } /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ /* or 16) items. I have chosen to implement this loop with labels and */ /* gotos to heighten the ease with which the loop may be implemented with */ /* a single decrement and branch instruction in assembly language and */ /* also because the labels act as highly readable place markers. */ /* (Also because we jump into the loop for endgame literals (see above)). */ begin_unrolled_loop: p_ziv=p_src; /* To process the next phrase, we hash the next three bytes to obtain */ /* an index to the zeroth (first) pointer in a target partition. We */ /* get the pointer. */ index=HASH(p_src); p_h0=&hash[index]; /* This next part runs through the pointers in the partition matching */ /* the bytes they point to in the Lempel with the bytes in the Ziv. */ /* The length (bestlen) and within-partition pointer number (bestpos) */ /* of the longest match so far is maintained and is the output of this */ /* segment of code. The s[bestlen]==... is an optimization only. */ bestlen=0; bestpos=0; for (d=0;d<HASH_TABLE_DEPTH;d++) { register UBYTE *s=p_src; register UBYTE *p=p_h0[d]; register UCARD len; if (s[bestlen] == p[bestlen]) { #define PS *p++!=*s++ PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || s++; len=s-p_src-1; if (len>bestlen) { bestpos=d; bestlen=len; } } } /* The length of the longest match determines whether we code a */ /* literal item or a copy item. */ if (bestlen<3) { /* Literal. */ /* Code the literal byte as itself and a zero control bit. */ literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; /* We have just coded a literal. If we had two pending ones, that */ /* makes three and we can update the hash table. */ if (p_h2!=0) {UPDATE_P(p_h2,p_ziv-2);} /* In any case, rotate the hash table pointers for next time. */ p_h2=p_h1; p_h1=p_h0; } else { /* Copy */ /* To code a copy item, we construct a hash table index of the */ /* winning pointer (index+=bestpos) and code it and the best length */ /* into a 2 byte code word. Bump up p_src. */ index+=bestpos; *p_dst++=((index&0xF00)>>4)|(bestlen-3); *p_dst++=index&0xFF; p_src+=bestlen; /* As we have just coded three bytes, we are now in a position to */ /* update the hash table with the literal bytes that were pending */ /* upon the arrival of extra context bytes. */ if (p_h1!=0) { if (p_h2!=0) {UPDATE_P(p_h2,p_ziv-2); p_h2=0;} UPDATE_P(p_h1,p_ziv-1); p_h1=0; } /* In any case, we can update the hash table based on the current */ /* position as we just coded at least three bytes in a copy items. */ UPDATE_P(p_h0,p_ziv); } control>>=1; /* This loop is all set up for a decrement and jump instruction! */ end_unrolled_loop: if (--unroll) goto begin_unrolled_loop; /* At this point it will nearly always be the end of a group in which */ /* case, we have to do some control-word processing. However, near the */ /* end of the input block, the inner unrolled loop is only executed once. */ /* This necessitates the 'if' test. */ if ((control&TOPWORD)==0) { /* Write the control word to the place we saved for it in the output. */ *p_control++= control &0xFF; *p_control = (control>>8) &0xFF; /* Reserve the next word in the output block for the control word */ /* for the group about to be processed. */ p_control=p_dst; p_dst+=2; /* Reset the control bits buffer. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -