📄 lzx.c

📁 CHMTools
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* cabextract 0.2 - a program to extract Microsoft Cabinet files * (C) 2000-2001 Stuart Caie <kyzer@4u.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdio.h>#include <stdlib.h>#include "lzx.h"#include "lzx_int.h"/* LZX decruncher *//* This LZX decruncher was pulled out of the program cabextract 0.2 by    Stuart Caie <kyzer@4u.net> and modified to be useful as an LZX decruncher    outside the context of CAB files.  I do not claim any copyright on the   (minor) modifications.      -- Matthew T. Russotto*//* Microsoft's LZX document and their implementation of the * com.ms.util.cab Java package do not concur. *  * Correlation between window size and number of position slots: In * the LZX document, 1MB window = 40 slots, 2MB window = 42 slots. In * the implementation, 1MB = 42 slots, 2MB = 50 slots. (The actual * calculation is 'find the first slot whose position base is equal to * or more than the required window size'). This would explain why * other tables in the document refer to 50 slots rather than 42. * * The constant NUM_PRIMARY_LENGTHS used in the decompression * pseudocode is not defined in the specification, although it could * be derived from the section on encoding match lengths. * * The LZX document does not state the uncompressed block has an * uncompressed length. Where does this length field come from, so we * can know how large the block is? The implementation suggests that * it's in the 24 bits proceeding the 3 blocktype bits, before the * alignment padding. * * The LZX document states that aligned offset blocks have their * aligned offset huffman tree AFTER the main and length tree. The * implementation suggests that the aligned offset tree is BEFORE the * main and length trees. * * The LZX document decoding algorithim states that, in an aligned * offset block, if an extra_bits value is 1, 2 or 3, then that number * of bits should be read and the result added to the match * offset. This is correct for 1 and 2, but not 3 bits, where only an * aligned symbol should be read. * * Regarding the E8 preprocessing, the LZX document states 'No * translation may be performed on the last 6 bytes of the input * block'. This is correct. However, the pseudocode provided checks * for the *E8 leader* up to the last 6 bytes. If the leader appears * between -10 and -7 bytes from the end, this would cause the next * four bytes to be modified, at least one of which would be in the * last 6 bytes, which is not allowed according to the spec. * * The specification states that the huffman trees must always contain * at least one element. However, many CAB files badly compressed * sections where the length tree is completely empty (because there * are no matches), and this is expected to succeed. *//* LZX uses what it calls 'position slots' to represent match offsets. * What this means is that a small 'position slot' number and a small * offset from that slot are encoded instead of one large offset for * every match. * - position_base is an index to the position slot bases * - extra_bits states how many bits of offset-from-base data is needed. */static ULONG position_base[51];static UBYTE extra_bits[52];int LZXinit(int window) {  int wndsize = 1 << window;  int i, j, posn_slots;  /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */  /* if a previously allocated window is big enough, keep it     */  if (window < 15 || window > 21) return DECR_DATAFORMAT;  if (LZX(actual_size) < wndsize) {    if (LZX(window)) free(LZX(window));    LZX(window) = NULL;  }  if (!LZX(window)) {    if (!(LZX(window) = malloc(wndsize))) return DECR_NOMEMORY;    LZX(actual_size) = wndsize;  }  LZX(window_size) = wndsize;  /* initialise static tables */  for (i=0, j=0; i <= 50; i += 2) {    extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */    if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */  }  for (i=0, j=0; i <= 50; i++) {    position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */    j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */  }  /* calculate required position slots */       if (window == 20) posn_slots = 42;  else if (window == 21) posn_slots = 50;  else posn_slots = window << 1;  /*posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */    LZX(R0)  =  LZX(R1)  = LZX(R2) = 1;  LZX(main_elements)   = LZX_NUM_CHARS + (posn_slots << 3);  LZX(header_read)     = 0;  LZX(frames_read)     = 0;  LZX(block_remaining) = 0;  LZX(block_type)      = LZX_BLOCKTYPE_INVALID;  LZX(intel_curpos)    = 0;  LZX(intel_started)   = 0;  LZX(window_posn)     = 0;  /* initialise tables to 0 (because deltas will be applied to them) */  for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) LZX(MAINTREE_len)[i] = 0;  for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   LZX(LENGTH_len)[i]   = 0;  return DECR_OK;}/* Bitstream reading macros: * * INIT_BITSTREAM    should be used first to set up the system * READ_BITS(var,n)  takes N bits from the buffer and puts them in var * * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer * PEEK_BITS(n)      extracts (without removing) N bits from the bit buffer * REMOVE_BITS(n)    removes N bits from the bit buffer * * These bit access routines work by using the area beyond the MSB and the * LSB as a free source of zeroes. This avoids having to mask any bits. * So we have to know the bit width of the bitbuffer variable. This is * sizeof(ULONG) * 8, also defined as ULONG_BITS *//* number of bits in ULONG. Note: This must be at multiple of 16, and at * least 32 for the bitbuffer code to work (ie, it must be able to ensure * up to 17 bits - that's adding 16 bits when there's one bit left, or * adding 32 bits when there are no bits left. The code should work fine * for machines where ULONG >= 32 bits. */#define ULONG_BITS (sizeof(ULONG)<<3)#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)#define ENSURE_BITS(n)							\  while (bitsleft < (n)) {						\    bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);	\    bitsleft += 16; inpos+=2;						\  }#define PEEK_BITS(n)   (bitbuf >> (ULONG_BITS - (n)))#define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n)))#define READ_BITS(v,n) do {						\  ENSURE_BITS(n);							\  (v) = PEEK_BITS(n);							\  REMOVE_BITS(n);							\} while (0)/* Huffman macros */#define TABLEBITS(tbl)   (LZX_##tbl##_TABLEBITS)#define MAXSYMBOLS(tbl)  (LZX_##tbl##_MAXSYMBOLS)#define SYMTABLE(tbl)    (LZX(tbl##_table))#define LENTABLE(tbl)    (LZX(tbl##_len))/* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths. * In reality, it just calls make_decode_table() with the appropriate * values - they're all fixed by some #defines anyway, so there's no point * writing each call out in full by hand. */#define BUILD_TABLE(tbl)						\  if (make_decode_table(						\    MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl)	\  )) { return DECR_ILLEGALDATA; }/* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the * bitstream using the stated table and puts it in var. */#define READ_HUFFSYM(tbl,var) do {					\  ENSURE_BITS(16);							\  hufftbl = SYMTABLE(tbl);						\  if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) {	\    j = 1 << (ULONG_BITS - TABLEBITS(tbl));				\    do {								\      j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0;			\      if (!j) { return DECR_ILLEGALDATA; }	                        \    } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl));			\  }									\  j = LENTABLE(tbl)[(var) = i];						\  REMOVE_BITS(j);							\} while (0)/* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols * first to last in the given table. The code lengths are stored in their * own special LZX way. */#define READ_LENGTHS(tbl,first,last) do { \  lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \  if (lzx_read_lens(LENTABLE(tbl),(first),(last),&lb)) { \    return DECR_ILLEGALDATA; \  } \  bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \} while (0)/* make_decode_table(nsyms, nbits, length[], table[]) * * This function was coded by David Tritscher. It builds a fast huffman * decoding table out of just a canonical huffman code lengths table. * * nsyms  = total number of symbols in this huffman tree. * nbits  = any symbols with a code length of nbits or less can be decoded *          in one lookup of the table. * length = A table to get code lengths from [0 to syms-1] * table  = The table to fill up with decoded symbols and pointers. * * Returns 0 for OK or 1 for error */int make_decode_table(int nsyms, int nbits, UBYTE *length, UWORD *table) {  register UWORD sym;  register ULONG leaf;  register UBYTE bit_num = 1;  ULONG fill;  ULONG pos         = 0; /* the current position in the decode table */  ULONG table_mask  = 1 << nbits;  ULONG bit_mask    = table_mask >> 1; /* don't do 0 length codes */  ULONG next_symbol = bit_mask; /* base of allocation for long codes */  /* fill entries for codes short enough for a direct mapping */  while (bit_num <= nbits) {    for (sym = 0; sym < nsyms; sym++) {      if (length[sym] == bit_num) {        leaf = pos;        if((pos += bit_mask) > table_mask) return 1; /* table overrun */        /* fill all possible lookups of this symbol with the symbol itself */        fill = bit_mask;        while (fill-- > 0) table[leaf++] = sym;      }    }    bit_mask >>= 1;    bit_num++;  }  /* if there are any codes longer than nbits */  if (pos != table_mask) {    /* clear the remainder of the table */    for (sym = pos; sym < table_mask; sym++) table[sym] = 0;    /* give ourselves room for codes to grow by up to 16 more bits */    pos <<= 16;    table_mask <<= 16;    bit_mask = 1 << 15;    while (bit_num <= 16) {      for (sym = 0; sym < nsyms; sym++) {        if (length[sym] == bit_num) {          leaf = pos >> 16;          for (fill = 0; fill < bit_num - nbits; fill++) {            /* if this path hasn't been taken yet, 'allocate' two entries */            if (table[leaf] == 0) {              table[(next_symbol << 1)] = 0;              table[(next_symbol << 1) + 1] = 0;              table[leaf] = next_symbol++;            }            /* follow the path and select either left or right for next bit */            leaf = table[leaf] << 1;            if ((pos >> (15-fill)) & 1) leaf++;          }          table[leaf] = sym;          if ((pos += bit_mask) > table_mask) return 1; /* table overflow */        }      }      bit_mask >>= 1;      bit_num++;    }  }  /* full table? */  if (pos == table_mask) return 0;  /* either erroneous table, or all elements are 0 - let's find out. */  for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;  return 0;}struct lzx_bits {  ULONG bb;  int bl;  UBYTE *ip;};int lzx_read_lens(UBYTE *lens, int first, int last, struct lzx_bits *lb) {  ULONG i,j, x,y;  int z;  register ULONG bitbuf = lb->bb;  register int bitsleft = lb->bl;  UBYTE *inpos = lb->ip;  UWORD *hufftbl;    for (x = 0; x < 20; x++) {    READ_BITS(y, 4);    LENTABLE(PRETREE)[x] = y;  }  BUILD_TABLE(PRETREE);  for (x = first; x < last; ) {    READ_HUFFSYM(PRETREE, z);    if (z == 17) {      READ_BITS(y, 4); y += 4;      while (y--) lens[x++] = 0;    }    else if (z == 18) {      READ_BITS(y, 5); y += 20;      while (y--) lens[x++] = 0;    }    else if (z == 19) {      READ_BITS(y, 1); y += 4;      READ_HUFFSYM(PRETREE, z);      z = lens[x] - z; if (z < 0) z += 17;      while (y--) lens[x++] = z;    }
12 下一页
💿 文件大小 25 K
👤 上传用户 zhoulovely
📂 所属分类压缩解压
📄 代码行数 706 行
💻 语言类型 C语言
🏷️ 相关标签

#CHMTools
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -