lzx.c

来自「又一开源的解压缩chm格式文件的代码」· C语言代码 · 共 811 行 · 第 1/3 页
811 行
/* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ *//*************************************************************************** *                        lzx.c - LZX decompression routines               * *                           -------------------                           * *                                                                         * *  maintainer: Jed Wing <jedwin@ugcs.caltech.edu>                         * *  source:     modified lzx.c from cabextract v0.5                        * *  notes:      This file was taken from cabextract v0.5, which was,       * *              itself, a modified version of the lzx decompression code   * *              from unlzx.                                                * *                                                                         * *  platforms:  In its current incarnation, this file has been tested on   * *              two different Linux platforms (one, redhat-based, with a   * *              2.1.2 glibc and gcc 2.95.x, and the other, Debian, with    * *              2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2).  Both were * *              Intel x86 compatible machines.                             * ***************************************************************************//*************************************************************************** *                                                                         * *   This program is free software; you can redistribute it and/or modify  * *   it under the terms of the GNU General Public License as published by  * *   the Free Software Foundation; either version 2 of the License, or     * *   (at your option) any later version.  Note that an exemption to this   * *   license has been granted by Stuart Caie for the purposes of           * *   distribution with chmlib.  This does not, to the best of my           * *   knowledge, constitute a change in the license of this (the LZX) code  * *   in general.                                                           * *                                                                         * ***************************************************************************/#include "lzx.h"#include <stdio.h>#include <stdlib.h>#include <string.h>#ifdef __GNUC__#define memcpy __builtin_memcpy#endif/* sized types */typedef unsigned char  UBYTE; /* 8 bits exactly    */typedef unsigned short UWORD; /* 16 bits (or more) */typedef unsigned int   ULONG; /* 32 bits (or more) */typedef   signed int    LONG; /* 32 bits (or more) *//* some constants defined by the LZX specification */#define LZX_MIN_MATCH                (2)#define LZX_MAX_MATCH                (257)#define LZX_NUM_CHARS                (256)#define LZX_BLOCKTYPE_INVALID        (0)   /* also blocktypes 4-7 invalid */#define LZX_BLOCKTYPE_VERBATIM       (1)#define LZX_BLOCKTYPE_ALIGNED        (2)#define LZX_BLOCKTYPE_UNCOMPRESSED   (3)#define LZX_PRETREE_NUM_ELEMENTS     (20)#define LZX_ALIGNED_NUM_ELEMENTS     (8)   /* aligned offset tree #elements */#define LZX_NUM_PRIMARY_LENGTHS      (7)   /* this one missing from spec! */#define LZX_NUM_SECONDARY_LENGTHS    (249) /* length tree #elements *//* LZX huffman defines: tweak tablebits as desired */#define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)#define LZX_PRETREE_TABLEBITS   (6)#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)#define LZX_MAINTREE_TABLEBITS  (12)#define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)#define LZX_LENGTH_TABLEBITS    (12)#define LZX_ALIGNED_MAXSYMBOLS  (LZX_ALIGNED_NUM_ELEMENTS)#define LZX_ALIGNED_TABLEBITS   (7)#define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */#define LZX_DECLARE_TABLE(tbl) \  UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\  UBYTE tbl##_len  [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]struct LZXstate{    UBYTE *window;         /* the actual decoding window              */    ULONG window_size;     /* window size (32Kb through 2Mb)          */    ULONG actual_size;     /* window size when it was first allocated */    ULONG window_posn;     /* current offset within the window        */    ULONG R0, R1, R2;      /* for the LRU offset system               */    UWORD main_elements;   /* number of main tree elements            */    int   header_read;     /* have we started decoding at all yet?    */    UWORD block_type;      /* type of this block                      */    ULONG block_length;    /* uncompressed length of this block       */    ULONG block_remaining; /* uncompressed bytes still left to decode */    ULONG frames_read;     /* the number of CFDATA blocks processed   */    LONG  intel_filesize;  /* magic header value used for transform   */    LONG  intel_curpos;    /* current offset in transform space       */    int   intel_started;   /* have we seen any translatable data yet? */    LZX_DECLARE_TABLE(PRETREE);    LZX_DECLARE_TABLE(MAINTREE);    LZX_DECLARE_TABLE(LENGTH);    LZX_DECLARE_TABLE(ALIGNED);};/* LZX decruncher *//* Microsoft's LZX document and their implementation of the * com.ms.util.cab Java package do not concur. * * In the LZX document, there is a table showing the correlation between * window size and the number of position slots. It states that the 1MB * window = 40 slots and the 2MB window = 42 slots. In the implementation, * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the * first slot whose position base is equal to or more than the required * window size'. This would explain why other tables in the document refer * to 50 slots rather than 42. * * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode * is not defined in the specification. * * The LZX document does not state the uncompressed block has an * uncompressed length field. Where does this length field come from, so * we can know how large the block is? The implementation has it as the 24 * bits following after the 3 blocktype bits, before the alignment * padding. * * The LZX document states that aligned offset blocks have their aligned * offset huffman tree AFTER the main and length trees. The implementation * suggests that the aligned offset tree is BEFORE the main and length * trees. * * The LZX document decoding algorithm states that, in an aligned offset * block, if an extra_bits value is 1, 2 or 3, then that number of bits * should be read and the result added to the match offset. This is * correct for 1 and 2, but not 3, where just a huffman symbol (using the * aligned tree) should be read. * * Regarding the E8 preprocessing, the LZX document states 'No translation * may be performed on the last 6 bytes of the input block'. This is * correct.  However, the pseudocode provided checks for the *E8 leader* * up to the last 6 bytes. If the leader appears between -10 and -7 bytes * from the end, this would cause the next four bytes to be modified, at * least one of which would be in the last 6 bytes, which is not allowed * according to the spec. * * The specification states that the huffman trees must always contain at * least one element. However, many CAB files contain blocks where the * length tree is completely empty (because there are no matches), and * this is expected to succeed. *//* LZX uses what it calls 'position slots' to represent match offsets. * What this means is that a small 'position slot' number and a small * offset from that slot are encoded instead of one large offset for * every match. * - position_base is an index to the position slot bases * - extra_bits states how many bits of offset-from-base data is needed. */static const UBYTE extra_bits[51] = {     0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,     7,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,    15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,    17, 17, 17};static const ULONG position_base[51] = {          0,       1,       2,      3,      4,      6,      8,     12,     16,     24,     32,       48,      64,      96,     128,     192,        256,     384,     512,    768,   1024,   1536,   2048,   3072,   4096,   6144,   8192,    12288,   16384,   24576,   32768,   49152,      65536,   98304,  131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,    1835008, 1966080, 2097152};struct LZXstate *LZXinit(int window){    struct LZXstate *pState=NULL;    ULONG wndsize = 1 << window;    int i, posn_slots;    /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */    /* if a previously allocated window is big enough, keep it     */    if (window < 15 || window > 21) return NULL;    /* allocate state and associated window */    pState = (struct LZXstate *)malloc(sizeof(struct LZXstate));    if (!(pState->window = (UBYTE *)malloc(wndsize)))    {        free(pState);        return NULL;    }    pState->actual_size = wndsize;    pState->window_size = wndsize;    /* calculate required position slots */    if (window == 20) posn_slots = 42;    else if (window == 21) posn_slots = 50;    else posn_slots = window << 1;    /** alternatively **/    /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */    /* initialize other state */    pState->R0  =  pState->R1  = pState->R2 = 1;    pState->main_elements   = LZX_NUM_CHARS + (posn_slots << 3);    pState->header_read     = 0;    pState->frames_read     = 0;    pState->block_remaining = 0;    pState->block_type      = LZX_BLOCKTYPE_INVALID;    pState->intel_curpos    = 0;    pState->intel_started   = 0;    pState->window_posn     = 0;    /* initialise tables to 0 (because deltas will be applied to them) */    for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0;    for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   pState->LENGTH_len[i]   = 0;    return pState;}void LZXteardown(struct LZXstate *pState){    if (pState)    {        if (pState->window)            free(pState->window);        free(pState);    }}int LZXreset(struct LZXstate *pState){    int i;    pState->R0  =  pState->R1  = pState->R2 = 1;    pState->header_read     = 0;    pState->frames_read     = 0;    pState->block_remaining = 0;    pState->block_type      = LZX_BLOCKTYPE_INVALID;    pState->intel_curpos    = 0;    pState->intel_started   = 0;    pState->window_posn     = 0;    for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0;    for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++)   pState->LENGTH_len[i]   = 0;    return DECR_OK;}/* Bitstream reading macros: * * INIT_BITSTREAM    should be used first to set up the system * READ_BITS(var,n)  takes N bits from the buffer and puts them in var * * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer * PEEK_BITS(n)      extracts (without removing) N bits from the bit buffer * REMOVE_BITS(n)    removes N bits from the bit buffer * * These bit access routines work by using the area beyond the MSB and the * LSB as a free source of zeroes. This avoids having to mask any bits. * So we have to know the bit width of the bitbuffer variable. This is * sizeof(ULONG) * 8, also defined as ULONG_BITS *//* number of bits in ULONG. Note: This must be at multiple of 16, and at * least 32 for the bitbuffer code to work (ie, it must be able to ensure * up to 17 bits - that's adding 16 bits when there's one bit left, or * adding 32 bits when there are no bits left. The code should work fine * for machines where ULONG >= 32 bits. */#define ULONG_BITS (sizeof(ULONG)<<3)#define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)#define ENSURE_BITS(n)							\  while (bitsleft < (n)) {						\    bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);	\
lzx.c - 源码说明

本页面展示了「又一开源的解压缩chm格式文件的代码」中的 lzx.c 源码文件，采用 C语言编程语言编写，共 811 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫开发者社区收录了大量与CHM解压相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?