📄 chm_lib.c
字号:
/* $Id: chm_lib.c,v 1.19 2003/09/07 13:01:43 jedwin Exp $ *//*************************************************************************** * chm_lib.c - CHM archive manipulation routines * * ------------------- * * * * author: Jed Wing <jedwin@ugcs.caltech.edu> * * version: 0.3 * * notes: These routines are meant for the manipulation of microsoft * * .chm (compiled html help) files, but may likely be used * * for the manipulation of any ITSS archive, if ever ITSS * * archives are used for any other purpose. * * * * Note also that the section names are statically handled. * * To be entirely correct, the section names should be read * * from the section names meta-file, and then the various * * content sections and the "transforms" to apply to the data * * they contain should be inferred from the section name and * * the meta-files referenced using that name; however, all of * * the files I've been able to get my hands on appear to have * * only two sections: Uncompressed and MSCompressed. * * Additionally, the ITSS.DLL file included with Windows does * * not appear to handle any different transforms than the * * simple LZX-transform. Furthermore, the list of transforms * * to apply is broken, in that only half the required space * * is allocated for the list. (It appears as though the * * space is allocated for ASCII strings, but the strings are * * written as unicode. As a result, only the first half of * * the string appears.) So this is probably not too big of * * a deal, at least until CHM v4 (MS .lit files), which also * * incorporate encryption, of some description. * * * * switches: CHM_MT: compile library with thread-safety * * * * switches (Linux only): * * CHM_USE_PREAD: compile library to use pread instead of * * lseek/read * * CHM_USE_IO64: compile library to support full 64-bit I/O * * as is needed to properly deal with the * * 64-bit file offsets. * ***************************************************************************//*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * * published by the Free Software Foundation; either version 2.1 of the * * License, or (at your option) any later version. * * * ***************************************************************************/#include "chm_lib.h"#ifdef CHM_MT#define _REENTRANT#endif#include "lzx.h"#include <stdlib.h>#include <string.h>#ifdef CHM_DEBUG#include <stdio.h>#endif#if __sun || __sgi#include <strings.h>#endif/* RWE 6/13/2003 */#ifdef _WIN32_WCE#define FREEBUF(x) free(x)#else#define FREEBUF(x) /* do nothing */#endif#ifdef WIN32#include <windows.h>#include <malloc.h>#ifdef _WIN32_WCE#define strcasecmp _stricmp#define strncasecmp _strnicmp#else#define strcasecmp stricmp#define strncasecmp strnicmp#endif#else/* basic Linux system includes */#define _XOPEN_SOURCE 500#include <unistd.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>/* #include <dmalloc.h> */#endif/* includes/defines for threading, if using them */#ifdef CHM_MT#ifdef WIN32#define CHM_ACQUIRE_LOCK(a) do { \ EnterCriticalSection(&(a)); \ } while(0)#define CHM_RELEASE_LOCK(a) do { \ EnterCriticalSection(&(a)); \ } while(0)#else#include <pthread.h>#define CHM_ACQUIRE_LOCK(a) do { \ pthread_mutex_lock(&(a)); \ } while(0)#define CHM_RELEASE_LOCK(a) do { \ pthread_mutex_unlock(&(a)); \ } while(0)#endif#else#define CHM_ACQUIRE_LOCK(a) /* do nothing */#define CHM_RELEASE_LOCK(a) /* do nothing */#endif#ifdef WIN32#define CHM_NULL_FD (INVALID_HANDLE_VALUE)#define CHM_USE_WIN32IO 1#define CHM_CLOSE_FILE(fd) CloseHandle((fd))#else#define CHM_NULL_FD (-1)#define CHM_CLOSE_FILE(fd) close((fd))#endif/* * defines related to tuning */#ifndef CHM_MAX_BLOCKS_CACHED#define CHM_MAX_BLOCKS_CACHED 5#endif/* * architecture specific defines * * Note: as soon as C99 is more widespread, the below defines should * probably just use the C99 sized-int types. * * The following settings will probably work for many platforms. The sizes * don't have to be exactly correct, but the types must accommodate at least as * many bits as they specify. *//* i386, 32-bit, Windows */#ifdef WIN32typedef unsigned char UChar;typedef __int16 Int16;typedef unsigned __int16 UInt16;typedef __int32 Int32;typedef unsigned __int32 UInt32;typedef __int64 Int64;typedef unsigned __int64 UInt64;/* I386, 32-bit, non-Windows *//* Sparc *//* MIPS *//* PPC */#elif __i386__ || __sun || __sgi || __ppc__typedef unsigned char UChar;typedef short Int16;typedef unsigned short UInt16;typedef long Int32;typedef unsigned long UInt32;typedef long long Int64;typedef unsigned long long UInt64;/* x86-64 *//* Note that these may be appropriate for other 64-bit machines. */#elif __x86_64__typedef unsigned char UChar;typedef short Int16;typedef unsigned short UInt16;typedef int Int32;typedef unsigned int UInt32;typedef long Int64;typedef unsigned long UInt64;#else/* yielding an error is preferable to yielding incorrect behavior */#error "Please define the sized types for your platform in chm_lib.c"#endif/* GCC */#ifdef __GNUC__#define memcmp __builtin_memcmp#define memcpy __builtin_memcpy#define strlen __builtin_strlen#elif defined(WIN32)static int ffs(unsigned int val){ int bit=1, idx=1; while (bit != 0 && (val & bit) == 0) { bit <<= 1; ++idx; } if (bit == 0) return 0; else return idx;}#endif/* utilities for unmarshalling data */static int _unmarshal_char_array(unsigned char **pData, unsigned int *pLenRemain, char *dest, int count){ if (count <= 0 || (unsigned int)count > *pLenRemain) return 0; memcpy(dest, (*pData), count); *pData += count; *pLenRemain -= count; return 1;}static int _unmarshal_uchar_array(unsigned char **pData, unsigned int *pLenRemain, unsigned char *dest, int count){ if (count <= 0 || (unsigned int)count > *pLenRemain) return 0; memcpy(dest, (*pData), count); *pData += count; *pLenRemain -= count; return 1;}static int _unmarshal_int16(unsigned char **pData, unsigned int *pLenRemain, Int16 *dest){ if (2 > *pLenRemain) return 0; *dest = (*pData)[0] | (*pData)[1]<<8; *pData += 2; *pLenRemain -= 2; return 1;}static int _unmarshal_uint16(unsigned char **pData, unsigned int *pLenRemain, UInt16 *dest){ if (2 > *pLenRemain) return 0; *dest = (*pData)[0] | (*pData)[1]<<8; *pData += 2; *pLenRemain -= 2; return 1;}static int _unmarshal_int32(unsigned char **pData, unsigned int *pLenRemain, Int32 *dest){ if (4 > *pLenRemain) return 0; *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; *pData += 4; *pLenRemain -= 4; return 1;}static int _unmarshal_uint32(unsigned char **pData, unsigned int *pLenRemain, UInt32 *dest){ if (4 > *pLenRemain) return 0; *dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24; *pData += 4; *pLenRemain -= 4; return 1;}static int _unmarshal_int64(unsigned char **pData, unsigned int *pLenRemain, Int64 *dest){ Int64 temp; int i; if (8 > *pLenRemain) return 0; temp=0; for(i=8; i>0; i--) { temp <<= 8; temp |= (*pData)[i-1]; } *dest = temp; *pData += 8; *pLenRemain -= 8; return 1;}static int _unmarshal_uint64(unsigned char **pData, unsigned int *pLenRemain, UInt64 *dest){ UInt64 temp; int i; if (8 > *pLenRemain) return 0; temp=0; for(i=8; i>0; i--) { temp <<= 8; temp |= (*pData)[i-1]; } *dest = temp; *pData += 8; *pLenRemain -= 8; return 1;}static int _unmarshal_uuid(unsigned char **pData, unsigned int *pDataLen, unsigned char *dest){ return _unmarshal_uchar_array(pData, pDataLen, dest, 16);}/* names of sections essential to decompression */static const char _CHMU_RESET_TABLE[] = "::DataSpace/Storage/MSCompressed/Transform/" "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/" "InstanceData/ResetTable";static const char _CHMU_LZXC_CONTROLDATA[] = "::DataSpace/Storage/MSCompressed/ControlData";static const char _CHMU_CONTENT[] = "::DataSpace/Storage/MSCompressed/Content";static const char _CHMU_SPANINFO[] = "::DataSpace/Storage/MSCompressed/SpanInfo";/* * structures local to this module *//* structure of ITSF headers */#define _CHM_ITSF_V2_LEN (0x58)#define _CHM_ITSF_V3_LEN (0x60)struct chmItsfHeader{ char signature[4]; /* 0 (ITSF) */ Int32 version; /* 4 */ Int32 header_len; /* 8 */ Int32 unknown_000c; /* c */ UInt32 last_modified; /* 10 */ UInt32 lang_id; /* 14 */ UChar dir_uuid[16]; /* 18 */ UChar stream_uuid[16]; /* 28 */ UInt64 unknown_offset; /* 38 */ UInt64 unknown_len; /* 40 */ UInt64 dir_offset; /* 48 */ UInt64 dir_len; /* 50 */ UInt64 data_offset; /* 58 (Not present before V3) */}; /* __attribute__ ((aligned (1))); */static int _unmarshal_itsf_header(unsigned char **pData, unsigned int *pDataLen, struct chmItsfHeader *dest){ /* we only know how to deal with the 0x58 and 0x60 byte structures */ if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN) return 0; /* unmarshal common fields */ _unmarshal_char_array(pData, pDataLen, dest->signature, 4); _unmarshal_int32 (pData, pDataLen, &dest->version); _unmarshal_int32 (pData, pDataLen, &dest->header_len); _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); _unmarshal_uint32 (pData, pDataLen, &dest->last_modified); _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); _unmarshal_uuid (pData, pDataLen, dest->dir_uuid); _unmarshal_uuid (pData, pDataLen, dest->stream_uuid); _unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset); _unmarshal_uint64 (pData, pDataLen, &dest->unknown_len); _unmarshal_uint64 (pData, pDataLen, &dest->dir_offset); _unmarshal_uint64 (pData, pDataLen, &dest->dir_len); /* error check the data */ /* XXX: should also check UUIDs, probably, though with a version 3 file, * current MS tools do not seem to use them. */ if (memcmp(dest->signature, "ITSF", 4) != 0) return 0; if (dest->version == 2) { if (dest->header_len < _CHM_ITSF_V2_LEN) return 0; } else if (dest->version == 3) { if (dest->header_len < _CHM_ITSF_V3_LEN) return 0; } else return 0; /* now, if we have a V3 structure, unmarshal the rest. * otherwise, compute it */ if (dest->version == 3) { if (*pDataLen != 0) _unmarshal_uint64(pData, pDataLen, &dest->data_offset); else return 0; } else dest->data_offset = dest->dir_offset + dest->dir_len; return 1;}/* structure of ITSP headers */#define _CHM_ITSP_V1_LEN (0x54)struct chmItspHeader{ char signature[4]; /* 0 (ITSP) */ Int32 version; /* 4 */ Int32 header_len; /* 8 */ Int32 unknown_000c; /* c */ UInt32 block_len; /* 10 */ Int32 blockidx_intvl; /* 14 */ Int32 index_depth; /* 18 */ Int32 index_root; /* 1c */ Int32 index_head; /* 20 */ Int32 unknown_0024; /* 24 */ UInt32 num_blocks; /* 28 */ Int32 unknown_002c; /* 2c */ UInt32 lang_id; /* 30 */ UChar system_uuid[16]; /* 34 */ UChar unknown_0044[16]; /* 44 */}; /* __attribute__ ((aligned (1))); */static int _unmarshal_itsp_header(unsigned char **pData, unsigned int *pDataLen, struct chmItspHeader *dest){ /* we only know how to deal with a 0x54 byte structures */ if (*pDataLen != _CHM_ITSP_V1_LEN) return 0; /* unmarshal fields */ _unmarshal_char_array(pData, pDataLen, dest->signature, 4); _unmarshal_int32 (pData, pDataLen, &dest->version); _unmarshal_int32 (pData, pDataLen, &dest->header_len); _unmarshal_int32 (pData, pDataLen, &dest->unknown_000c); _unmarshal_uint32 (pData, pDataLen, &dest->block_len); _unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl); _unmarshal_int32 (pData, pDataLen, &dest->index_depth); _unmarshal_int32 (pData, pDataLen, &dest->index_root); _unmarshal_int32 (pData, pDataLen, &dest->index_head); _unmarshal_int32 (pData, pDataLen, &dest->unknown_0024); _unmarshal_uint32 (pData, pDataLen, &dest->num_blocks); _unmarshal_int32 (pData, pDataLen, &dest->unknown_002c); _unmarshal_uint32 (pData, pDataLen, &dest->lang_id); _unmarshal_uuid (pData, pDataLen, dest->system_uuid); _unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16); /* error check the data */ if (memcmp(dest->signature, "ITSP", 4) != 0) return 0; if (dest->version != 1) return 0; if (dest->header_len != _CHM_ITSP_V1_LEN) return 0; return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -