📄 lzrw2.c
字号:
/* the sixteenth item in the group. *//* * A zero bit in a control word means that the corresponding item is a *//* literal item. A one bit corresponds to a copy item. *//* * A literal item consists of a single byte which represents itself. *//* * A copy item consists of two bytes that represent from 3 to 18 bytes. *//* * The first byte in a copy item will be denoted C1. *//* * The second byte in a copy item will be denoted C2. *//* * Bits will be selected using square brackets. *//* For example: C1[0..3] is the low nibble of the first control byte. *//* of copy item C1. *//* * The LENGTH of a copy item is defined to be C1[0..3]+3 which is a number *//* in the range [3,18]. *//* * The INDEX of a copy item is defined to be C1[4..7]*256+C2[0..8] which *//* is a number in the range [0,4095]. *//* * A copy item represents the sequence of bytes *//* text[POS-OFFSET..POS-OFFSET+LENGTH-1] where *//* text is the entire text of the uncompressed string. *//* POS is the index in the text of the character following the *//* string represented by all the items preceeding the item *//* being defined. *//* OFFSET is obtained from INDEX by looking up the phrase table. *//* *//******************************************************************************//* Stylistic note: The LZRW1 algorithm was written in an extremely terse *//* style because it had to fit on a single page in a paper. This style *//* carried over to the LZRW1-A algorithm. However, LZRW2 has not been *//* published and so I have reverted to my normal programming style. *//* Stylistic note: This code could be considerably neated by the use of *//* dependent declarations such as {int a=3,b=a+1;}. However I can't locate a *//* clause in K&R that guarantees that declarations are evaluated in order. */ /* The following #define defines the length of the copy flag that appears at *//* the start of the compressed file. The value of four bytes was chosen *//* because the fast_copy routine on my Macintosh runs faster if the source *//* and destination blocks are relatively longword aligned. *//* The actual flag data appears in the first byte. The rest are zeroed so as *//* to normalize the compressed representation (i.e. not non-deterministic). */#define FLAG_BYTES 4/* The following #defines define the meaning of the values of the copy *//* flag at the start of the compressed file. */#define FLAG_COMPRESS 0 /* Signals that output was result of compression. */#define FLAG_COPY 1 /* Signals that output was simply copied over. *//* The 68000 microprocessor (on which this algorithm was originally developed *//* is fussy about non-aligned arrays of words. To avoid these problems the *//* following macro can be used to "waste" from 0 to 3 bytes so as to align *//* the argument pointer. */#define ULONG_ALIGN_UP(X) ((((ULONG)X)+3)&~3)/* The following constant defines the maximum length of an uncompressed item. *//* This definition must not be changed; its value is hardwired into the code. *//* The longest number of bytes that can be spanned by a single item is 18 *//* for the longest copy item. */#define MAX_RAW_ITEM (18)/* The following constant defines the maximum length of an uncompressed group.*//* This definition must not be changed; its value is hardwired into the code. *//* A group contains at most 16 items which explains this definition. */#define MAX_RAW_GROUP (16*MAX_RAW_ITEM)/* The following constant defines the maximum length of a compressed group. *//* This definition must not be changed; its value is hardwired into the code. *//* A compressed group consists of two control bytes followed by up to 16 *//* compressed items each of which can have a maximum length of two bytes. */#define MAX_CMP_GROUP (2+16*2)/* The following constant defines the number of entries in the phrase table. *//* This definition must not be changed; its value is hardwired into the code. */#define PHRASE_TABLE_LENGTH (4096)/* The following constant defines the number of entries in the hash table. *//* This definition must not be changed; its value is hardwired into the code. */#define HASH_TABLE_LENGTH (4096)/* At initialization, the hash table entries are all set to point to element *//* zero of the phrase table. In order for the algorithm to start up, *//* phrase[0] needs to point to a well defined string of at least 18 bytes. At *//* startup, there is no already-transmitted source text to point to and so *//* we have to invent some dummy text to point to. It doesn't matter what is *//* in this string so long as it is at least MAX_RAW_ITEM bytes long and is *//* the same in the compressor and decompressor. I chose consecutive decimal *//* digits because they do not have white space problems (e.g. there is no *//* chance that the compiler will replace more than one space by a TAB) and *//* because they make the length of the string obvious by inspection. */#define START_STRING_18 "123456789012345678"/******************************************************************************/ LOCAL void compress_compress (p_wrk_mem,p_src_first,src_len,p_dst_first,p_dst_len)/* Input : Hand over the required amount of working memory in p_wrk_mem. *//* Input : Specify input block using p_src_first and src_len. *//* Input : Point p_dst_first to the start of the output zone (OZ). *//* Input : Point p_dst_len to a ULONG to receive the output length. *//* Input : Input block and output zone must not overlap. *//* Output : Length of output block written to *p_dst_len. *//* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May *//* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*//* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */UBYTE *p_wrk_mem;UBYTE *p_src_first;ULONG src_len;UBYTE *p_dst_first;ULONG *p_dst_len;{ /* p_src and p_dst step through the source and destination blocks. */ register UBYTE *p_src = p_src_first; register UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the */ /* calculations that determine when the main loop terminates. */ UBYTE *p_src_post = p_src_first+src_len; UBYTE *p_dst_post = p_dst_first+src_len; UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; /* The variables 'p_control' and 'control' are used to buffer control bits. */ /* Before each group is processed, the next two bytes of the output block */ /* are set aside for the control word for the group about to be processed. */ /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ /* 'control' buffers the control bits being generated during the processing */ /* of the group. Instead of having a counter to keep track of how many items */ /* have been processed (=the number of bits in the control word), at the */ /* start of each group, the top word of 'control' is filled with 1 bits. */ /* As 'control' is shifted for each item, the 1 bits in the top word are */ /* absorbed or destroyed. When they all run out (i.e. when the top word is */ /* all zero bits, we know that we are at the end of a group. */ #define TOPWORD 0xFFFF0000 UBYTE *p_control; ULONG control=TOPWORD; UWORD *hash; /* Points to the first element of the hash table. */ UBYTE **phrase; /* Points to the first element of the phrase table. */ register UWORD next; /* Index of next phrase entry to be overwritten. */ /* Set up the hash table and the phrase table in the memory given to */ /* the algorithm. These tables are the only occupants of the memory. */ hash = (UWORD *) ULONG_ALIGN_UP(p_wrk_mem); phrase = (UBYTE **) ULONG_ALIGN_UP(&hash[HASH_TABLE_LENGTH]); /* The variable 'next' cycles through the phrase table indicating the next */ /* position in the table to write a new phrase pointer. */ next=0; /* To start, we write the flag bytes. Being optimistic, we set the flag to */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ /* algorithm deterministic. */ *p_dst++=FLAG_COMPRESS; {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} /* Reserve the first word of output as the control word for the first group. */ /* Note: This is undone at the end if the input block is empty. */ p_control=p_dst; p_dst+=2; /* Initialize the hash table and the phrase table. The hash table entries */ /* all point to element zero of the phrase table which in turn points to */ /* a constant string. The remainder of the phrase table does not need to */ /* be initialized as the algorithm is arranged so that no element of the */ /* phrase table is ever read before it has been written. */ /* In theory, either the hash table or the phrase table has to be */ /* initialized. I chose the hash table as this choice yields determinism. */ {register UWORD i,*t=hash; #define ZH *t++=0; *t++=0; *t++=0; *t++=0 for (i=0;i<256;i++) {ZH;ZH;ZH;ZH;} /* 256=HASH_TABLE_LENGTH/16 */ } phrase[0]=(UBYTE *) START_STRING_18; /* The main loop processes either 1 or 16 items per iteration. As its */ /* termination logic is complicated, I have opted for an infinite loop */ /* structure containing 'break' and 'goto' statements. */ while (TRUE) {/* Begin main processing loop. */ register UBYTE *p; /* Scans through targ phrase during matching. */ register UBYTE *p_ziv; /* Points to first byte of current Ziv. */ register UWORD phrase_num; /* Current phrase number (index in phr tab). */ register UWORD unroll; /* Loop counter for unrolled inner loop. */ register UWORD *p_hte; /* Pointer to the current hash table entry. */ /* Test for overrun and jump to overrun code if necessary. */ if (p_dst>p_dst_post) goto overrun; /* The following cascade of if statements efficiently catches and deals */ /* with varying degrees of closeness to the end of the input block. */ /* When we get very close to the end, we stop updating the tables and */ /* code the remaining bytes as literals. This makes the code simpler. */ unroll=16; if (p_src>p_src_max16) { unroll=1; if (p_src>p_src_max1) { if (p_src==p_src_post) break; else goto literal; } } /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ /* or 16) items. I have chosen to implement this loop with labels and */ /* gotos to heighten the ease with which the loop may be implemented with */ /* a single decrement and branch instruction in assembly language and */ /* also because the labels act as highly readable place markers. */ /* (Also because we jump into the loop for endgame literals (see above)). */ begin_unrolled_loop: /* To process the next phrase, we hash the next three bytes and use */ /* the resultant hash table index to look up the hash table. A pointer */ /* to the entry is stored in p_he. We store a pointer instead of an */ /* index so as to avoid array lookups. The hash table entry contains a */ /* phrase number 'phrase_num' which we use to look up the phrase table */ /* and get a pointer 'p' to a potential match in the history. */ p_hte=&hash[((40543*((p_src[0]<<8)^(p_src[1]<<4)^p_src[2]))>>4) & 0xFFF]; phrase_num=*p_hte; p=phrase[phrase_num]; /* Update the hash table and phrase table. The next element of the */ /* phrase table points to the first byte of the current Ziv and the */ /* hash table entry we looked up gets overwritten with the phrase */ /* table entry number. Wraparound of 'next' is done elsewhere. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -