📄 chm_lib.cpp
字号:
/* $Id: chm_lib.c,v 1.18 2002/10/10 03:24:51 jedwin Exp $ */
/***************************************************************************
* chm_lib.c - CHM archive manipulation routines *
* ------------------- *
* *
* author: Jed Wing <jedwin@ugcs.caltech.edu> *
* version: 0.3 *
* notes: These routines are meant for the manipulation of microsoft *
* .chm (compiled html help) files, but may likely be used *
* for the manipulation of any ITSS archive, if ever ITSS *
* archives are used for any other purpose. *
* *
* Note also that the section names are statically handled. *
* To be entirely correct, the section names should be read *
* from the section names meta-file, and then the various *
* content sections and the "transforms" to apply to the data *
* they contain should be inferred from the section name and *
* the meta-files referenced using that name; however, all of *
* the files I've been able to get my hands on appear to have *
* only two sections: Uncompressed and MSCompressed. *
* Additionally, the ITSS.DLL file included with Windows does *
* not appear to handle any different transforms than the *
* simple LZX-transform. Furthermore, the list of transforms *
* to apply is broken, in that only half the required space *
* is allocated for the list. (It appears as though the *
* space is allocated for ASCII strings, but the strings are *
* written as unicode. As a result, only the first half of *
* the string appears.) So this is probably not too big of *
* a deal, at least until CHM v4 (MS .lit files), which also *
* incorporate encryption, of some description. *
* *
* switches: CHM_MT: compile library with thread-safety *
* *
* switches (Linux only): *
* CHM_USE_PREAD: compile library to use pread instead of *
* lseek/read *
* CHM_USE_IO64: compile library to support full 64-bit I/O *
* as is needed to properly deal with the *
* 64-bit file offsets. *
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation; either version 2.1 of the *
* License, or (at your option) any later version. *
* *
***************************************************************************/
#include "chm_lib.h"
#include "lzx.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <windows.h>
#include <malloc.h>
#define CHM_NULL_FD (NULL)
#define CHM_CLOSE_FILE(fd) fclose((fd))
/*
* defines related to tuning
*/
#ifndef CHM_MAX_BLOCKS_CACHED
#define CHM_MAX_BLOCKS_CACHED 5
#endif
static int ffs(unsigned int val)
{
int bit=1, idx=1;
while (bit != 0 && (val & bit) == 0)
{
bit <<= 1;
++idx;
}
if (bit == 0)
return 0;
else
return idx;
}
/*
* architecture specific defines
*
* Note: as soon as C99 is more widespread, the below defines should
* probably just use the C99 sized-int types.
*
* The following settings will probably work for many platforms. The sizes
* don't have to be exactly correct, but the types must accommodate at least as
* many bits as they specify.
*/
/* i386, 32-bit, Windows */
typedef unsigned char UChar;
typedef __int16 Int16;
typedef unsigned __int16 UInt16;
typedef __int32 Int32;
typedef unsigned __int32 UInt32;
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
/* utilities for unmarshalling data */
static int _unmarshal_char_array(unsigned char **pData,
unsigned long *pLenRemain,
char *dest,
int count)
{
if (count <= 0 || (unsigned int)count > *pLenRemain)
return 0;
memcpy(dest, (*pData), count);
*pData += count;
*pLenRemain -= count;
return 1;
}
static int _unmarshal_uchar_array(unsigned char **pData,
unsigned long *pLenRemain,
unsigned char *dest,
int count)
{
if (count <= 0 || (unsigned int)count > *pLenRemain)
return 0;
memcpy(dest, (*pData), count);
*pData += count;
*pLenRemain -= count;
return 1;
}
static int _unmarshal_int16(unsigned char **pData,
unsigned long *pLenRemain,
Int16 *dest)
{
if (2 > *pLenRemain)
return 0;
*dest = (*pData)[0] | (*pData)[1]<<8;
*pData += 2;
*pLenRemain -= 2;
return 1;
}
static int _unmarshal_uint16(unsigned char **pData,
unsigned long *pLenRemain,
UInt16 *dest)
{
if (2 > *pLenRemain)
return 0;
*dest = (*pData)[0] | (*pData)[1]<<8;
*pData += 2;
*pLenRemain -= 2;
return 1;
}
static int _unmarshal_int32(unsigned char **pData,
unsigned long *pLenRemain,
Int32 *dest)
{
if (4 > *pLenRemain)
return 0;
*dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
*pData += 4;
*pLenRemain -= 4;
return 1;
}
static int _unmarshal_uint32(unsigned char **pData,
unsigned long *pLenRemain,
UInt32 *dest)
{
if (4 > *pLenRemain)
return 0;
*dest = (*pData)[0] | (*pData)[1]<<8 | (*pData)[2]<<16 | (*pData)[3]<<24;
*pData += 4;
*pLenRemain -= 4;
return 1;
}
static int _unmarshal_int64(unsigned char **pData,
unsigned long *pLenRemain,
Int64 *dest)
{
Int64 temp;
int i;
if (8 > *pLenRemain)
return 0;
temp=0;
for(i=8; i>0; i--)
{
temp <<= 8;
temp |= (*pData)[i-1];
}
*dest = temp;
*pData += 8;
*pLenRemain -= 8;
return 1;
}
static int _unmarshal_uint64(unsigned char **pData,
unsigned long *pLenRemain,
UInt64 *dest)
{
UInt64 temp;
int i;
if (8 > *pLenRemain)
return 0;
temp=0;
for(i=8; i>0; i--)
{
temp <<= 8;
temp |= (*pData)[i-1];
}
*dest = temp;
*pData += 8;
*pLenRemain -= 8;
return 1;
}
static int _unmarshal_uuid(unsigned char **pData,
unsigned long *pDataLen,
unsigned char *dest)
{
return _unmarshal_uchar_array(pData, pDataLen, dest, 16);
}
/* names of sections essential to decompression */
static const char _CHMU_RESET_TABLE[] =
"::DataSpace/Storage/MSCompressed/Transform/"
"{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/"
"InstanceData/ResetTable";
static const char _CHMU_LZXC_CONTROLDATA[] =
"::DataSpace/Storage/MSCompressed/ControlData";
static const char _CHMU_CONTENT[] =
"::DataSpace/Storage/MSCompressed/Content";
static const char _CHMU_SPANINFO[] =
"::DataSpace/Storage/MSCompressed/SpanInfo";
/*
* structures local to this module
*/
/* structure of ITSF headers */
#define _CHM_ITSF_V2_LEN (0x58)
#define _CHM_ITSF_V3_LEN (0x60)
struct chmItsfHeader
{
char signature[4]; /* 0 (ITSF) */
Int32 version; /* 4 */
Int32 header_len; /* 8 */
Int32 unknown_000c; /* c */
UInt32 last_modified; /* 10 */
UInt32 lang_id; /* 14 */
UChar dir_uuid[16]; /* 18 */
UChar stream_uuid[16]; /* 28 */
UInt64 unknown_offset; /* 38 */
UInt64 unknown_len; /* 40 */
UInt64 dir_offset; /* 48 */
UInt64 dir_len; /* 50 */
UInt64 data_offset; /* 58 (Not present before V3) */
}; /* __attribute__ ((aligned (1))); */
static int _unmarshal_itsf_header(unsigned char **pData,
unsigned long *pDataLen,
struct chmItsfHeader *dest)
{
/* we only know how to deal with the 0x58 and 0x60 byte structures */
if (*pDataLen != _CHM_ITSF_V2_LEN && *pDataLen != _CHM_ITSF_V3_LEN)
return 0;
/* unmarshal common fields */
_unmarshal_char_array(pData, pDataLen, dest->signature, 4);
_unmarshal_int32 (pData, pDataLen, &dest->version);
_unmarshal_int32 (pData, pDataLen, &dest->header_len);
_unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
_unmarshal_uint32 (pData, pDataLen, &dest->last_modified);
_unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
_unmarshal_uuid (pData, pDataLen, dest->dir_uuid);
_unmarshal_uuid (pData, pDataLen, dest->stream_uuid);
_unmarshal_uint64 (pData, pDataLen, &dest->unknown_offset);
_unmarshal_uint64 (pData, pDataLen, &dest->unknown_len);
_unmarshal_uint64 (pData, pDataLen, &dest->dir_offset);
_unmarshal_uint64 (pData, pDataLen, &dest->dir_len);
/* error check the data */
/* XXX: should also check UUIDs, probably, though with a version 3 file,
* current MS tools do not seem to use them.
*/
if (memcmp(dest->signature, "ITSF", 4) != 0)
return 0;
if (dest->version == 2)
{
if (dest->header_len < _CHM_ITSF_V2_LEN)
return 0;
}
else if (dest->version == 3)
{
if (dest->header_len < _CHM_ITSF_V3_LEN)
return 0;
}
else
return 0;
/* now, if we have a V3 structure, unmarshal the rest.
* otherwise, compute it
*/
if (dest->version == 3)
{
if (*pDataLen != 0)
_unmarshal_uint64(pData, pDataLen, &dest->data_offset);
else
return 0;
}
else
dest->data_offset = dest->dir_offset + dest->dir_len;
return 1;
}
/* structure of ITSP headers */
#define _CHM_ITSP_V1_LEN (0x54)
struct chmItspHeader
{
char signature[4]; /* 0 (ITSP) */
Int32 version; /* 4 */
Int32 header_len; /* 8 */
Int32 unknown_000c; /* c */
UInt32 block_len; /* 10 */
Int32 blockidx_intvl; /* 14 */
Int32 index_depth; /* 18 */
Int32 index_root; /* 1c */
Int32 index_head; /* 20 */
Int32 unknown_0024; /* 24 */
UInt32 num_blocks; /* 28 */
Int32 unknown_002c; /* 2c */
UInt32 lang_id; /* 30 */
UChar system_uuid[16]; /* 34 */
UChar unknown_0044[16]; /* 44 */
}; /* __attribute__ ((aligned (1))); */
static int _unmarshal_itsp_header(unsigned char **pData,
unsigned long *pDataLen,
struct chmItspHeader *dest)
{
/* we only know how to deal with a 0x54 byte structures */
if (*pDataLen != _CHM_ITSP_V1_LEN)
return 0;
/* unmarshal fields */
_unmarshal_char_array(pData, pDataLen, dest->signature, 4);
_unmarshal_int32 (pData, pDataLen, &dest->version);
_unmarshal_int32 (pData, pDataLen, &dest->header_len);
_unmarshal_int32 (pData, pDataLen, &dest->unknown_000c);
_unmarshal_uint32 (pData, pDataLen, &dest->block_len);
_unmarshal_int32 (pData, pDataLen, &dest->blockidx_intvl);
_unmarshal_int32 (pData, pDataLen, &dest->index_depth);
_unmarshal_int32 (pData, pDataLen, &dest->index_root);
_unmarshal_int32 (pData, pDataLen, &dest->index_head);
_unmarshal_int32 (pData, pDataLen, &dest->unknown_0024);
_unmarshal_uint32 (pData, pDataLen, &dest->num_blocks);
_unmarshal_int32 (pData, pDataLen, &dest->unknown_002c);
_unmarshal_uint32 (pData, pDataLen, &dest->lang_id);
_unmarshal_uuid (pData, pDataLen, dest->system_uuid);
_unmarshal_uchar_array(pData, pDataLen, dest->unknown_0044, 16);
/* error check the data */
if (memcmp(dest->signature, "ITSP", 4) != 0)
return 0;
if (dest->version != 1)
return 0;
if (dest->header_len != _CHM_ITSP_V1_LEN)
return 0;
return 1;
}
/* structure of PMGL headers */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -