📄 lzrw3.c
字号:
/* POS is the index in the text of the character following the *//* string represented by all the items preceeding the item *//* being defined. *//* OFFSET is obtained from INDEX by looking up the hash table. *//* *//******************************************************************************//* The following #define defines the length of the copy flag that appears at *//* the start of the compressed file. The value of four bytes was chosen *//* because the fast_copy routine on my Macintosh runs faster if the source *//* and destination blocks are relatively longword aligned. *//* The actual flag data appears in the first byte. The rest are zeroed so as *//* to normalize the compressed representation (i.e. not non-deterministic). */#define FLAG_BYTES 4/* The following #defines define the meaning of the values of the copy *//* flag at the start of the compressed file. */#define FLAG_COMPRESS 0 /* Signals that output was result of compression. */#define FLAG_COPY 1 /* Signals that output was simply copied over. *//* The 68000 microprocessor (on which this algorithm was originally developed *//* is fussy about non-aligned arrays of words. To avoid these problems the *//* following macro can be used to "waste" from 0 to 3 bytes so as to align *//* the argument pointer. */#define ULONG_ALIGN_UP(X) ((((ULONG)X)+sizeof(ULONG)-1)&~(sizeof(ULONG)-1))/* The following constant defines the maximum length of an uncompressed item. *//* This definition must not be changed; its value is hardwired into the code. *//* The longest number of bytes that can be spanned by a single item is 18 *//* for the longest copy item. */#define MAX_RAW_ITEM (18)/* The following constant defines the maximum length of an uncompressed group.*//* This definition must not be changed; its value is hardwired into the code. *//* A group contains at most 16 items which explains this definition. */#define MAX_RAW_GROUP (16*MAX_RAW_ITEM)/* The following constant defines the maximum length of a compressed group. *//* This definition must not be changed; its value is hardwired into the code. *//* A compressed group consists of two control bytes followed by up to 16 *//* compressed items each of which can have a maximum length of two bytes. */#define MAX_CMP_GROUP (2+16*2)/* The following constant defines the number of entries in the hash table. *//* This definition must not be changed; its value is hardwired into the code. */#define HASH_TABLE_LENGTH (4096)/* LZRW3, unlike LZRW1(-A), must initialize its hash table so as to enable *//* the compressor and decompressor to stay in step maintaining identical hash *//* tables. In an early version of the algorithm, the tables were simply *//* initialized to zero and a check for zero was included just before the *//* matching code. However, this test costs time. A better solution is to *//* initialize all the entries in the hash table to point to a constant *//* string. The decompressor does the same. This solution requires no extra *//* test. The contents of the string do not matter so long as the string is *//* the same for the compressor and decompressor and contains at least *//* MAX_RAW_ITEM bytes. I chose consecutive decimal digits because they do not *//* have white space problems (e.g. there is no chance that the compiler will *//* replace more than one space by a TAB) and because they make the length of *//* the string obvious by inspection. */#define START_STRING_18 ((UBYTE *) "123456789012345678")/* In this algorithm, hash values have to be calculated at more than one *//* point. The following macro neatens the code up for this. */#define HASH(PTR) \ (((40543*(((*(PTR))<<8)^((*((PTR)+1))<<4)^(*((PTR)+2))))>>4) & 0xFFF)/******************************************************************************//* Input : Hand over the required amount of working memory in p_wrk_mem. *//* Input : Specify input block using p_src_first and src_len. *//* Input : Point p_dst_first to the start of the output zone (OZ). *//* Input : Point p_dst_len to a ULONG to receive the output length. *//* Input : Input block and output zone must not overlap. *//* Output : Length of output block written to *p_dst_len. *//* Output : Output block in Mem[p_dst_first..p_dst_first+*p_dst_len-1]. May *//* Output : write in OZ=Mem[p_dst_first..p_dst_first+src_len+MAX_CMP_GROUP-1].*//* Output : Upon completion guaranteed *p_dst_len<=src_len+FLAG_BYTES. */LOCAL void compress_compress(UBYTE *p_wrk_mem, UBYTE *p_src_first, ULONG src_len, UBYTE *p_dst_first, LONG *p_dst_len){ /* p_src and p_dst step through the source and destination blocks. */ register UBYTE *p_src = p_src_first; register UBYTE *p_dst = p_dst_first; /* The following variables are never modified and are used in the */ /* calculations that determine when the main loop terminates. */ UBYTE *p_src_post = p_src_first+src_len; UBYTE *p_dst_post = p_dst_first+src_len; UBYTE *p_src_max1 = p_src_first+src_len-MAX_RAW_ITEM; UBYTE *p_src_max16 = p_src_first+src_len-MAX_RAW_ITEM*16; /* The variables 'p_control' and 'control' are used to buffer control bits. */ /* Before each group is processed, the next two bytes of the output block */ /* are set aside for the control word for the group about to be processed. */ /* 'p_control' is set to point to the first byte of that word. Meanwhile, */ /* 'control' buffers the control bits being generated during the processing */ /* of the group. Instead of having a counter to keep track of how many items */ /* have been processed (=the number of bits in the control word), at the */ /* start of each group, the top word of 'control' is filled with 1 bits. */ /* As 'control' is shifted for each item, the 1 bits in the top word are */ /* absorbed or destroyed. When they all run out (i.e. when the top word is */ /* all zero bits, we know that we are at the end of a group. */# define TOPWORD 0xFFFF0000 UBYTE *p_control; register ULONG control=TOPWORD; /* THe variable 'hash' always points to the first element of the hash table. */ UBYTE **hash= (UBYTE **) ULONG_ALIGN_UP(p_wrk_mem); /* The following two variables represent the literal buffer. p_h1 points to */ /* the hash table entry corresponding to the youngest literal. p_h2 points */ /* to the hash table entry corresponding to the second youngest literal. */ /* Note: p_h1=0=>p_h2=0 because zero values denote absence of a pending */ /* literal. The variables are initialized to zero meaning an empty "buffer". */ UBYTE **p_h1=NULL; UBYTE **p_h2=NULL; /* To start, we write the flag bytes. Being optimistic, we set the flag to */ /* FLAG_COMPRESS. The remaining flag bytes are zeroed so as to keep the */ /* algorithm deterministic. */ *p_dst++=FLAG_COMPRESS; {UWORD i; for (i=2;i<=FLAG_BYTES;i++) *p_dst++=0;} /* Reserve the first word of output as the control word for the first group. */ /* Note: This is undone at the end if the input block is empty. */ p_control=p_dst; p_dst+=2; /* Initialize all elements of the hash table to point to a constant string. */ /* Use of an unrolled loop speeds this up considerably. */ {UWORD i; UBYTE **p_h=hash;# define ZH *p_h++=START_STRING_18 for (i=0;i<256;i++) /* 256=HASH_TABLE_LENGTH/16. */ {ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH; ZH;ZH;ZH;ZH;} } /* The main loop processes either 1 or 16 items per iteration. As its */ /* termination logic is complicated, I have opted for an infinite loop */ /* structure containing 'break' and 'goto' statements. */ while (TRUE) {/* Begin main processing loop. */ /* Note: All the variables here except unroll should be defined within */ /* the inner loop. Unfortunately the loop hasn't got a block. */ register UBYTE *p; /* Scans through targ phrase during matching. */ register UBYTE *p_ziv= NULL ; /* Points to first byte of current Ziv. */ register UWORD unroll; /* Loop counter for unrolled inner loop. */ register UWORD index; /* Index of current hash table entry. */ register UBYTE **p_h0 = NULL ; /* Pointer to current hash table entry. */ /* Test for overrun and jump to overrun code if necessary. */ if (p_dst>p_dst_post) goto overrun; /* The following cascade of if statements efficiently catches and deals */ /* with varying degrees of closeness to the end of the input block. */ /* When we get very close to the end, we stop updating the table and */ /* code the remaining bytes as literals. This makes the code simpler. */ unroll=16; if (p_src>p_src_max16) { unroll=1; if (p_src>p_src_max1) { if (p_src==p_src_post) break; else goto literal; } } /* This inner unrolled loop processes 'unroll' (whose value is either 1 */ /* or 16) items. I have chosen to implement this loop with labels and */ /* gotos to heighten the ease with which the loop may be implemented with */ /* a single decrement and branch instruction in assembly language and */ /* also because the labels act as highly readable place markers. */ /* (Also because we jump into the loop for endgame literals (see above)). */ begin_unrolled_loop: /* To process the next phrase, we hash the next three bytes and use */ /* the resultant hash table index to look up the hash table. A pointer */ /* to the entry is stored in p_h0 so as to avoid an array lookup. The */ /* hash table entry *p_h0 is looked up yielding a pointer p to a */ /* potential match of the Ziv in the history. */ index=HASH(p_src); p_h0=&hash[index]; p=*p_h0; /* Having looked up the candidate position, we are in a position to */ /* attempt a match. The match loop has been unrolled using the PS */ /* macro so that failure within the first three bytes automatically */ /* results in the literal branch being taken. The coding is simple. */ /* p_ziv saves p_src so we can let p_src wander. */# define PS *p++!=*p_src++ p_ziv=p_src; if (PS || PS || PS) { /* Literal. */ /* Code the literal byte as itself and a zero control bit. */ p_src=p_ziv; literal: *p_dst++=*p_src++; control&=0xFFFEFFFF; /* We have just coded a literal. If we had two pending ones, that */ /* makes three and we can update the hash table. */ if (p_h2!=0) {*p_h2=p_ziv-2;} /* In any case, rotate the hash table pointers for next time. */ p_h2=p_h1; p_h1=p_h0; } else { /* Copy */ /* Match up to 15 remaining bytes using an unrolled loop and code. */#if 0 PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || p_src++;#else if ( !( PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS || PS ) ) p_src++;#endif *p_dst++=((index&0xF00)>>4)|(--p_src-p_ziv-3); *p_dst++=index&0xFF; /* As we have just coded three bytes, we are now in a position to */ /* update the hash table with the literal bytes that were pending */ /* upon the arrival of extra context bytes. */ if (p_h1!=0) { if (p_h2) {*p_h2=p_ziv-2; p_h2=NULL;} *p_h1=p_ziv-1; p_h1=NULL; } /* In any case, we can update the hash table based on the current */ /* position as we just coded at least three bytes in a copy items. */ *p_h0=p_ziv;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -