📄 simple-lz77.c
字号:
/** Maximal input size that can be encoded by dictionary, in bytes, not* depending on [current] dictionary state.*/static int maxDictionaryEncodingIn(void){ /* Note: we do not consider current dictionary state, e.g. adjust * for current dictionary length [possibly smaller than size], etc. */ assert(LOOKAHEADSIZE > 0); /* 15 bytes (2**4-1, where 4 is the number of bits in length encoding) */ return LOOKAHEADSIZE;}/** Maximal output size in encoding by-dictionary, in bytes, not* depending on [current] dictionary state.*/static int maxDictionaryEncodingOut(void){ assert(POINTERBYTES > 0); /* 12 bits dictionary offset encoding, 4 bits length encoding */ return POINTERBYTES; /* 2 */}/** Encode by-dictionary.* Note: LZ77 won't encode anything by-dictionary if no match is found* [in which case inUsed will be 0 and outUsed 2]; this requires* additional encoding steps, e.g. sending plain symbols [either* alternating with dictionary encoding or as an alternative].* Note: inSize=0 is explicitly allowed, to allow zero/non-match encoding* possibly needed by encoder to spare a last byte as new symbol output.*/static void encodeByDictionary( lz77* p, char const* in, /* input */ int inSize, /* input size, in bytes */ int* inUsed, /* returns the number of input bytes consumed after coding */ char* out, /* output buffer */ int outSize, /* output buffer size, bytes */ int* outUsed /* returns the number of output bytes generated by coding */){ int offset, length; assert(inUsed != NULL); assert(outUsed != NULL); *inUsed = 0; *outUsed = 0; assert(LOOKAHEADSIZE > 0); /* can't encode more than LOOKAHEADSIZE */ if (inSize > LOOKAHEADSIZE) { inSize = LOOKAHEADSIZE; } /* [already check here that we'll be able to code this length later] */ assert(inSize < (1 << LENGTHBITS)); assert(POINTERBYTES > 0); assert(outSize >= POINTERBYTES); /* [sanity check] */ if (outSize < POINTERBYTES) { return; } assert(p != NULL); /* search for a convenient substring in the window */ offset = 0; length = 0; findLargest(in, inSize, p->window, WINDOWSIZE, &offset, &length); *inUsed = length; *outUsed = POINTERBYTES; if (length == 0) { offset = 0; /* [should already be so] */ } assert(POINTERBITS == 2 * 8); assert(POINTERBYTES == 2); assert(OFFSETBITS + LENGTHBITS == POINTERBITS); assert(OFFSETBITS - 8 >= 0); assert(LENGTHBITS >= 0); assert(offset >= 0); assert(offset < (1 << OFFSETBITS)); assert(length >= 0); assert(length < (1 << LENGTHBITS)); assert(out != NULL); /* encode offset and length */ out[0] = (char)(offset >> (OFFSETBITS - 8)); /* high order offset */ out[1] = (char)((offset << LENGTHBITS) | length); /* low order offset and length */}/** Greedy search for largest substring [from beginning of string] in a* window.* Search forward and start at relative offset 0.* Note: size may be 0.*/static void findLargest( char const* s, int size, char const* window, int windowSize, int* offset, int* length){ int r, m; assert(length != NULL); assert(offset != NULL); *length = 0; /* maximal match length so far */ *offset = 0; /* match offset associated to length */ for (r = 0; r < windowSize; r++) { /* offset */ m = matchLength(&window[r], windowSize - r, s, size); /* [not considering the window as circular] */ if (m > *length) { /* prefer earlier matches [i.e. further from current position in this case] */ *length = m; *offset = r; } } assert((*length == 0 && *offset == 0) || *length != 0);}/** Return length of a possible match.* Note: the lengths may be zero.*/static int matchLength(char const* a, int an, char const* b, int bn){ int n; assert(a != NULL); assert(b != NULL); n = 0; /* match length */ while (an-- > 0 && bn-- > 0 && *a++ == *b++) { n++; } return n;}/** Maximal input size that can be decoded by dictionary, in bytes, not* depending on [current] dictionary state.*/static int maxDictionaryDecodingIn(void){ /* decode-in size is the same as encode-out */ return maxDictionaryEncodingOut();}/** Maximal output size in decoding by-dictionary, in bytes, not* depending on [current] dictionary state.*/static int maxDictionaryDecodingOut(void){ /* decode-out size is the same as encode-in */ return maxDictionaryEncodingIn();}/** Decode by-dictionary.*/static void decodeByDictionary( lz77* p, char const* in, /* input */ int inSize, /* input size, in bytes */ int* inUsed, /* returns the number of input bytes consumed after coding */ char* out, /* output buffer */ int outSize, /* output buffer size, bytes */ int* outUsed /* returns the number of output bytes generated by coding */){ int offset, length; assert(inUsed != NULL); assert(outUsed != NULL); *inUsed = 0; *outUsed = 0; assert(POINTERBITS == 2 * 8); assert(POINTERBYTES == 2); assert(OFFSETBITS + LENGTHBITS == POINTERBITS); assert(OFFSETBITS - 8 >= 0); assert(LENGTHBITS >= 0); assert(8 - LENGTHBITS >= 0); assert(in != NULL); assert(POINTERBYTES > 0); assert(inSize >= POINTERBYTES); /* [sanity check] */ if (inSize < POINTERBYTES) { return; } /* decode offset and length */ offset = ((unsigned char)in[0] << (OFFSETBITS - 8)) | ((unsigned char)in[1] >> LENGTHBITS); length = in[1] & ((unsigned char)0xff >> (8 - LENGTHBITS)); assert(offset >= 0); assert(offset < (1 << OFFSETBITS)); assert(length >= 0); assert(length < (1 << LENGTHBITS)); assert(out != NULL); assert(outSize >= length); /* [sanity check] */ if (outSize < length) { return; } assert(p != NULL); memmove(out, &p->window[offset], length); *inUsed = POINTERBYTES; *outUsed = length;}/** Update dictionary [model] with [some] input.* Typically called after codeing, with inSize being equal to that inUsed.*/static void updateDictionary( lz77* p, char const* in, /* input considered for model update, may not overlap with p->window */ int inSize /* size, bytes */){ /* Update dictionary [model] with [some] input. */ assert(inSize >= 0); assert(in != NULL); assert(p != NULL); if (inSize >= WINDOWSIZE) { /* new data fills whole window, old data [and possibly some new data] is discarded */ assert(inSize - WINDOWSIZE >= 0); memmove(p->window, &in[inSize - WINDOWSIZE], WINDOWSIZE); } else if (inSize > 0) { /* some old data needs to be discarded */ /* Note: in order to support overlapping window and in, * we move in to the beginning of window first and then * rotate the window. */ assert(inSize <= WINDOWSIZE); memmove(p->window, in, inSize); memrot(p->window, WINDOWSIZE, inSize); } /* else: inSize==0: nothing to do */}/** Rotate a memory buffer of specified size by n bytes to the left.* Right-rotation is achieved by rotating it size-n.* Element size can be a divisor of gcd(size,n).*/static void* memrot(void* p, int size, int n){ int rounds; char *q, *e; if (size == 0) { /* nothing to do; %size below would fail */ return p; } n %= size; /* adjust n to be 0<=n<size */ if (n < 0) { /* consider a possible negative n */ n += size; } if (n == 0) { /* nothing to do; rounds below would be large */ return p; } q = (char*)p; /* starting point for a round */ e = q + size; /* end of buffer, to wrap-adjust */ rounds = gcd(size, n); /* number of rounds needed to move memory */ for (; rounds > 0; q++, rounds--) { char* r = q; char c = *r; /* the one temporary location needed for moving the whole buffer */ for (;;) { char* s = r + n; /* next location to be moved */ if (s >= e) { /* consider wrap */ s -= size; } if (s == q) { break; /* done */ } *r = *s; /* move */ r = s; /* next */ } *r = c; } return p;}/** Greatest common divisor of two integers.* gcd(a,b) == gcd(b,a), gcd(a,0) == a.*/static int gcd(int a, int b){ /* it is more convenient to have a>b * [otherwise the first remainder calculation just swaps them] */ while (b != 0) { int c = a % b; a = b; b = c; } return a;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -