📄 lzx.c
字号:
/* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ *//*************************************************************************** * lzx.c - LZX decompression routines * * ------------------- * * * * maintainer: Jed Wing <jedwin@ugcs.caltech.edu> * * source: modified lzx.c from cabextract v0.5 * * notes: This file was taken from cabextract v0.5, which was, * * itself, a modified version of the lzx decompression code * * from unlzx. * * * * platforms: In its current incarnation, this file has been tested on * * two different Linux platforms (one, redhat-based, with a * * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with * * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were * * Intel x86 compatible machines. * ***************************************************************************//*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. Note that an exemption to this * * license has been granted by Stuart Caie for the purposes of * * distribution with chmlib. This does not, to the best of my * * knowledge, constitute a change in the license of this (the LZX) code * * in general. * * * ***************************************************************************/#include "lzx.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#ifdef __GNUC__#define memcpy __builtin_memcpy#endif/* sized types */typedef unsigned char UBYTE; /* 8 bits exactly */typedef unsigned short UWORD; /* 16 bits (or more) */typedef unsigned int ULONG; /* 32 bits (or more) */typedef signed int LONG; /* 32 bits (or more) *//* some constants defined by the LZX specification */#define LZX_MIN_MATCH (2)#define LZX_MAX_MATCH (257)#define LZX_NUM_CHARS (256)#define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */#define LZX_BLOCKTYPE_VERBATIM (1)#define LZX_BLOCKTYPE_ALIGNED (2)#define LZX_BLOCKTYPE_UNCOMPRESSED (3)#define LZX_PRETREE_NUM_ELEMENTS (20)#define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */#define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */#define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements *//* LZX huffman defines: tweak tablebits as desired */#define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)#define LZX_PRETREE_TABLEBITS (6)#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)#define LZX_MAINTREE_TABLEBITS (12)#define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)#define LZX_LENGTH_TABLEBITS (12)#define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS)#define LZX_ALIGNED_TABLEBITS (7)#define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */#define LZX_DECLARE_TABLE(tbl) \ UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\ UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]struct LZXstate{ UBYTE *window; /* the actual decoding window */ ULONG window_size; /* window size (32Kb through 2Mb) */ ULONG actual_size; /* window size when it was first allocated */ ULONG window_posn; /* current offset within the window */ ULONG R0, R1, R2; /* for the LRU offset system */ UWORD main_elements; /* number of main tree elements */ int header_read; /* have we started decoding at all yet? */ UWORD block_type; /* type of this block */ ULONG block_length; /* uncompressed length of this block */ ULONG block_remaining; /* uncompressed bytes still left to decode */ ULONG frames_read; /* the number of CFDATA blocks processed */ LONG intel_filesize; /* magic header value used for transform */ LONG intel_curpos; /* current offset in transform space */ int intel_started; /* have we seen any translatable data yet? */ LZX_DECLARE_TABLE(PRETREE); LZX_DECLARE_TABLE(MAINTREE); LZX_DECLARE_TABLE(LENGTH); LZX_DECLARE_TABLE(ALIGNED);};/* LZX decruncher *//* Microsoft's LZX document and their implementation of the * com.ms.util.cab Java package do not concur. * * In the LZX document, there is a table showing the correlation between * window size and the number of position slots. It states that the 1MB * window = 40 slots and the 2MB window = 42 slots. In the implementation, * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the * first slot whose position base is equal to or more than the required * window size'. This would explain why other tables in the document refer * to 50 slots rather than 42. * * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode * is not defined in the specification. * * The LZX document does not state the uncompressed block has an * uncompressed length field. Where does this length field come from, so * we can know how large the block is? The implementation has it as the 24 * bits following after the 3 blocktype bits, before the alignment * padding. * * The LZX document states that aligned offset blocks have their aligned * offset huffman tree AFTER the main and length trees. The implementation * suggests that the aligned offset tree is BEFORE the main and length * trees. * * The LZX document decoding algorithm states that, in an aligned offset * block, if an extra_bits value is 1, 2 or 3, then that number of bits * should be read and the result added to the match offset. This is * correct for 1 and 2, but not 3, where just a huffman symbol (using the * aligned tree) should be read. * * Regarding the E8 preprocessing, the LZX document states 'No translation * may be performed on the last 6 bytes of the input block'. This is * correct. However, the pseudocode provided checks for the *E8 leader* * up to the last 6 bytes. If the leader appears between -10 and -7 bytes * from the end, this would cause the next four bytes to be modified, at * least one of which would be in the last 6 bytes, which is not allowed * according to the spec. * * The specification states that the huffman trees must always contain at * least one element. However, many CAB files contain blocks where the * length tree is completely empty (because there are no matches), and * this is expected to succeed. *//* LZX uses what it calls 'position slots' to represent match offsets. * What this means is that a small 'position slot' number and a small * offset from that slot are encoded instead of one large offset for * every match. * - position_base is an index to the position slot bases * - extra_bits states how many bits of offset-from-base data is needed. */static const UBYTE extra_bits[51] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17};static const ULONG position_base[51] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008, 1966080, 2097152};struct LZXstate *LZXinit(int window){ struct LZXstate *pState=NULL; ULONG wndsize = 1 << window; int i, posn_slots; /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ /* if a previously allocated window is big enough, keep it */ if (window < 15 || window > 21) return NULL; /* allocate state and associated window */ pState = (struct LZXstate *)malloc(sizeof(struct LZXstate)); if (!(pState->window = (UBYTE *)malloc(wndsize))) { free(pState); return NULL; } pState->actual_size = wndsize; pState->window_size = wndsize; /* calculate required position slots */ if (window == 20) posn_slots = 42; else if (window == 21) posn_slots = 50; else posn_slots = window << 1; /** alternatively **/ /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */ /* initialize other state */ pState->R0 = pState->R1 = pState->R2 = 1; pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3); pState->header_read = 0; pState->frames_read = 0; pState->block_remaining = 0; pState->block_type = LZX_BLOCKTYPE_INVALID; pState->intel_curpos = 0; pState->intel_started = 0; pState->window_posn = 0; /* initialise tables to 0 (because deltas will be applied to them) */ for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0; for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0; return pState;}void LZXteardown(struct LZXstate *pState){ if (pState) { if (pState->window) free(pState->window); free(pState); }}int LZXreset(struct LZXstate *pState){ int i; pState->R0 = pState->R1 = pState->R2 = 1; pState->header_read = 0; pState->frames_read = 0; pState->block_remaining = 0; pState->block_type = LZX_BLOCKTYPE_INVALID; pState->intel_curpos = 0; pState->intel_started = 0; pState->window_posn = 0; for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0; for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0; return DECR_OK;}/* Bitstream reading macros: * * INIT_BITSTREAM should be used first to set up the system * READ_BITS(var,n) takes N bits from the buffer and puts them in var * * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer * REMOVE_BITS(n) removes N bits from the bit buffer * * These bit access routines work by using the area beyond the MSB and the * LSB as a free source of zeroes. This avoids having to mask any bits. * So we have to know the bit width of the bitbuffer variable. This is * sizeof(ULONG) * 8, also defined as ULONG_BITS *//* number of bits in ULONG. Note: This must be at multiple of 16, and at * least 32 for the bitbuffer code to work (ie, it must be able to ensure * up to 17 bits - that's adding 16 bits when there's one bit left, or * adding 32 bits when there are no bits left. The code should work fine * for machines where ULONG >= 32 bits. */#define ULONG_BITS (sizeof(ULONG)<<3)#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)#define ENSURE_BITS(n) \ while (bitsleft < (n)) { \ bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft); \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -