📄 chmfile.cpp

📁 一个CHM在LINUX下的使用工具
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*  Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>   XML-RPC/Context ID code contributed by Eamon Millman / PCI Geomatics  <millman@pcigeomatics.com>   This program is free software; you can redistribute it and/or modify  it under the terms of the GNU General Public License as published by  the Free Software Foundation; either version 2 of the License, or  (at your option) any later version.    This program is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  GNU General Public License for more details.    You should have received a copy of the GNU General Public License  along with this program; if not, write to the Free Software  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <chmfile.h>#include <contenttaghandler.h>#include <chmlistctrl.h>#include <wx/wx.h>#include <wx/defs.h>#include <wx/strconv.h>#include <wx/fontmap.h>#include <wx/treectrl.h>#include <assert.h>#include <bitfiddle.inl>namespace {// damn wxWidgets and it's scoped ptr.class UCharPtr {public:	UCharPtr(unsigned char *p) : _p(p) {}	~UCharPtr() { delete[] _p; }	unsigned char *get() { return _p; }private:	UCharPtr(const UCharPtr&);	UCharPtr& operator=(const UCharPtr&);private:	unsigned char *_p;};} // namespace// Big-enough buffer size for use with various routines.#define BUF_SIZE 4096// Thanks to Vadim Zeitlin.#define ANSI_CHARSET            0#define DEFAULT_CHARSET         1#define SYMBOL_CHARSET          2#define SHIFTJIS_CHARSET        128#define HANGEUL_CHARSET         129#define HANGUL_CHARSET          129#define GB2312_CHARSET          134#define CHINESEBIG5_CHARSET     136#define OEM_CHARSET             255#define JOHAB_CHARSET           130#define HEBREW_CHARSET          177#define ARABIC_CHARSET          178#define GREEK_CHARSET           161#define TURKISH_CHARSET         162#define VIETNAMESE_CHARSET      163#define THAI_CHARSET            222#define EASTEUROPE_CHARSET      238#define RUSSIAN_CHARSET         204#define MAC_CHARSET             77#define BALTIC_CHARSET          186// Hello, Microsoft#define LANG_NEUTRAL		0x00 // check#define LANG_ARABIC		0x01 // check#define LANG_BULGARIAN		0x02 // check#define LANG_CATALAN		0x03#define LANG_CHINESE		0x04 // check#define LANG_CZECH		0x05#define LANG_DANISH		0x06#define LANG_GERMAN		0x07#define LANG_GREEK		0x08 // check#define LANG_ENGLISH		0x09#define LANG_SPANISH		0x0a#define LANG_FINNISH		0x0b#define LANG_FRENCH		0x0c#define LANG_HEBREW		0x0d // check#define LANG_HUNGARIAN		0x0e#define LANG_ICELANDIC		0x0f#define LANG_ITALIAN		0x10#define LANG_JAPANESE		0x11 // check#define LANG_KOREAN		0x12 // check#define LANG_DUTCH		0x13#define LANG_NORWEGIAN		0x14#define LANG_POLISH		0x15#define LANG_PORTUGUESE		0x16#define LANG_ROMANIAN		0x18#define LANG_RUSSIAN		0x19 // check#define LANG_CROATIAN		0x1a#define LANG_SERBIAN		0x1a#define LANG_SLOVAK		0x1b#define LANG_ALBANIAN		0x1c#define LANG_SWEDISH		0x1d#define LANG_THAI		0x1e // check#define LANG_TURKISH		0x1f // check#define LANG_URDU		0x20#define LANG_INDONESIAN		0x21#define LANG_UKRAINIAN		0x22 // check#define LANG_BELARUSIAN		0x23#define LANG_SLOVENIAN		0x24#define LANG_ESTONIAN		0x25#define LANG_LATVIAN		0x26#define LANG_LITHUANIAN		0x27#define LANG_FARSI		0x29#define LANG_VIETNAMESE		0x2a#define LANG_ARMENIAN		0x2b#define LANG_AZERI		0x2c#define LANG_BASQUE		0x2d#define LANG_MACEDONIAN		0x2f#define LANG_AFRIKAANS		0x36#define LANG_GEORGIAN		0x37#define LANG_FAEROESE		0x38#define LANG_HINDI		0x39#define LANG_MALAY		0x3e#define LANG_KAZAK		0x3f#define LANG_KYRGYZ		0x40#define LANG_SWAHILI		0x41#define LANG_UZBEK		0x43#define LANG_TATAR		0x44#define LANG_BENGALI		0x45#define LANG_PUNJABI		0x46#define LANG_GUJARATI		0x47#define LANG_ORIYA		0x48#define LANG_TAMIL		0x49#define LANG_TELUGU		0x4a#define LANG_KANNADA		0x4b#define LANG_MALAYALAM		0x4c#define LANG_ASSAMESE		0x4d#define LANG_MARATHI		0x4e#define LANG_SANSKRIT		0x4f#define LANG_MONGOLIAN		0x50#define LANG_GALICIAN		0x56#define LANG_KONKANI		0x57#define LANG_MANIPURI		0x58#define LANG_SINDHI		0x59#define LANG_SYRIAC		0x5a#define LANG_KASHMIRI		0x60#define LANG_NEPALI		0x61#define LANG_DIVEHI		0x65CHMFile::CHMFile()	: _chmFile(NULL), _home(wxT("/")){}CHMFile::CHMFile(const wxString& archiveName)	: _chmFile(NULL), _home(wxT("/")){	LoadCHM(archiveName);}CHMFile::~CHMFile(){	CloseCHM();}bool CHMFile::LoadCHM(const wxString&  archiveName){	if(_chmFile)		CloseCHM();	assert(_chmFile == NULL);	_chmFile = chm_open(static_cast<const char *>(archiveName.mb_str()));		if(_chmFile == NULL)		return false;	_enc = wxFONTENCODING_SYSTEM;	_filename = archiveName;		GetArchiveInfo();	LoadContextIDs();#if wxUSE_UNICODE	// Fix the title	if(_enc != wxFONTENCODING_SYSTEM) {		wxCSConv cv(_enc);		wchar_t buf2[BUF_SIZE];		size_t len = (_title.length() < BUF_SIZE) ? 			_title.length() : BUF_SIZE;		size_t ret = cv.MB2WC(buf2, _title.mb_str(), len);		if(ret)			_title = wxString(buf2, ret);	}#endif	return true;}void CHMFile::CloseCHM(){	if(_chmFile == NULL)		return;	chm_close(_chmFile);		_cidMap.clear();	_chmFile = NULL;	_home = wxT("/");	_filename = _home = _topicsFile = _indexFile = _title 		= _font = wxEmptyString;}bool CHMFile::GetTopicsTree(wxTreeCtrl *toBuild){	chmUnitInfo ui;	if(!toBuild)		return false;	if(_topicsFile.IsEmpty() || !ResolveObject(_topicsFile, &ui))		return false;	wxString src;	src.Alloc(ui.length);	GetFileAsString(src, &ui);	if(src.IsEmpty())		return false;	ContentParser parser;	parser.AddTagHandler(new ContentTagHandler(_enc, toBuild));		parser.Parse(src);	return true;}bool CHMFile::GetIndex(CHMListCtrl* toBuild){	chmUnitInfo ui;	if(!toBuild)		return false;	if(_indexFile.IsEmpty() || !ResolveObject(_indexFile, &ui))		return false;	wxString src;	src.Alloc(ui.length);	GetFileAsString(src, &ui);	if(src.IsEmpty())		return false;	ContentParser parser;	parser.AddTagHandler(new ContentTagHandler(_enc, NULL, toBuild));	parser.Parse(src);	toBuild->UpdateUI();	return true;}bool CHMFile::LoadContextIDs(){	chmUnitInfo ivb_ui, strs_ui;	_cidMap.clear();	// make sure what we need is there. 	// #IVB has list of context ID's and #STRINGS offsets to file names.	if( chm_resolve_object(_chmFile, "/#IVB", &ivb_ui ) != 				CHM_RESOLVE_SUCCESS ||		chm_resolve_object(_chmFile, "/#STRINGS", &strs_ui) != 				CHM_RESOLVE_SUCCESS )		return false; // failed to find internal files		UCharPtr ivb_buf(new unsigned char[ivb_ui.length]);	u_int64_t ivb_len = 0;	if((ivb_len = chm_retrieve_object(_chmFile, &ivb_ui, 					  ivb_buf.get(), 0, 					  ivb_ui.length)) == 0 )		return false; // failed to retrieve data	// always odd (DWORD + 2(n)*DWORD, so make even	ivb_len = ivb_len/sizeof(u_int32_t) - 1; 		if( ivb_len % 2 != 0 )		return false; // we retrieved unexpected data from the file.	u_int32_t *ivbs = new u_int32_t[ivb_len];	int j = 4; // offset to exclude first DWORD		// convert our DWORDs to numbers	for( unsigned int i = 0; i < ivb_len; i++ )	{		ivbs[i] = UINT32ARRAY(ivb_buf.get() + j);		j+=4; // step to the next DWORD	}	UCharPtr strs_buf(new unsigned char[strs_ui.length]);	u_int64_t strs_len = 0;	if( (strs_len = chm_retrieve_object(_chmFile, &strs_ui, 					    strs_buf.get(), 					    0, strs_ui.length)) == 0 ) {		delete[] ivbs;		return false; // failed to retrieve data	}	for( unsigned int i = 0; i < ivb_len; i+=2 )	{	// context-IDs as KEY, fileName from #STRINGS as VALUE		_cidMap[ivbs[i]] = CURRENT_CHAR_STRING(			strs_buf.get() + ivbs[i+1]);	}	delete[] ivbs;		// everything went well!	return true;}bool CHMFile::IsValidCID( const int contextID ){	if(_cidMap.empty())		return FALSE;	CHMIDMap::iterator itr = _cidMap.find( contextID );	if( itr == _cidMap.end() )		return FALSE;	return TRUE;}wxString CHMFile::GetPageByCID( const int contextID ){	if(_cidMap.empty())		return wxT("/");	CHMIDMap::iterator itr = _cidMap.find( contextID );	// make sure the key/value pair is valid	if(itr == _cidMap.end() ) 		return wxT("/");		return wxString(wxT("/")) + itr->second;}bool CHMFile::IndexSearch(const wxString& text, bool wholeWords, 			  bool titlesOnly, CHMSearchResults *results){	bool partial = false;	if(text.IsEmpty())		return false;	chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr;	if(::chm_resolve_object(_chmFile, "/$FIftiMain", &ui) !=	   CHM_RESOLVE_SUCCESS || 	   ::chm_resolve_object(_chmFile, "/#TOPICS", &uitopics) !=	   CHM_RESOLVE_SUCCESS ||	   ::chm_resolve_object(_chmFile, "/#STRINGS", &uistrings) !=	   CHM_RESOLVE_SUCCESS ||	   ::chm_resolve_object(_chmFile, "/#URLTBL", &uiurltbl) !=	   CHM_RESOLVE_SUCCESS ||	   ::chm_resolve_object(_chmFile, "/#URLSTR", &uiurlstr) !=	   CHM_RESOLVE_SUCCESS)		return false;#define FTS_HEADER_LEN 0x32	unsigned char header[FTS_HEADER_LEN];	if(::chm_retrieve_object(_chmFile, &ui,				 header, 0, FTS_HEADER_LEN) == 0)		return false;		unsigned char doc_index_s = header[0x1E], doc_index_r = header[0x1F];	unsigned char code_count_s = header[0x20], code_count_r = header[0x21];	unsigned char loc_codes_s = header[0x22], loc_codes_r = header[0x23];	if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) {		// Don't know how to use values other than 2 yet. Maybe		// next chmspec.		return false;	}	unsigned char* cursor32 = header + 0x14;	u_int32_t node_offset = UINT32ARRAY(cursor32);	cursor32 = header + 0x2e;	u_int32_t node_len = UINT32ARRAY(cursor32);	unsigned char* cursor16 = header + 0x18;	u_int16_t tree_depth = UINT16ARRAY(cursor16);	unsigned char word_len, pos;	wxString word;	u_int32_t i = sizeof(u_int16_t);	u_int16_t free_space;	UCharPtr buffer(new unsigned char[node_len]);	node_offset = GetLeafNodeOffset(text, node_offset, node_len,					tree_depth, &ui);	if(!node_offset) 		return false;	do {		// got a leaf node here.		if(::chm_retrieve_object(_chmFile, &ui, buffer.get(), 					 node_offset, node_len) == 0)			return false;		cursor16 = buffer.get() + 6;		free_space = UINT16ARRAY(cursor16);		i = sizeof(u_int32_t) + sizeof(u_int16_t) + sizeof(u_int16_t);		u_int64_t wlc_count, wlc_size;		u_int32_t wlc_offset;		while(i < node_len - free_space) {			word_len = *(buffer.get() + i);			pos = *(buffer.get() + i + 1);			char *wrd_buf = new char[word_len];			memcpy(wrd_buf, buffer.get() + i + 2, word_len - 1);			wrd_buf[word_len - 1] = 0;			if(pos == 0)				word = CURRENT_CHAR_STRING(wrd_buf);			else				word = word.Mid(0, pos) +					CURRENT_CHAR_STRING(wrd_buf);			delete[] wrd_buf;			i += 2 + word_len;			unsigned char title = *(buffer.get() + i - 1);			size_t encsz;			wlc_count = be_encint(buffer.get() + i, encsz);			i += encsz;					cursor32 = buffer.get() + i;			wlc_offset = UINT32ARRAY(cursor32);			i += sizeof(u_int32_t) + sizeof(u_int16_t);			wlc_size =  be_encint(buffer.get() + i, encsz);			i += encsz;			cursor32 = buffer.get();			node_offset = UINT32ARRAY(cursor32);					if(!title && titlesOnly)				continue;			if(wholeWords && !text.CmpNoCase(word))				return ProcessWLC(wlc_count, wlc_size, 						  wlc_offset, doc_index_s, 						  doc_index_r,code_count_s, 						  code_count_r, loc_codes_s, 						  loc_codes_r, &ui, &uiurltbl,						  &uistrings, &uitopics,						  &uiurlstr, results);			if(!wholeWords) {				if(word.StartsWith(text.c_str())) {					partial = true;					ProcessWLC(wlc_count, wlc_size, 						   wlc_offset, doc_index_s, 						   doc_index_r,code_count_s, 						   code_count_r, loc_codes_s, 						   loc_codes_r, &ui, &uiurltbl,						   &uistrings, &uitopics,						   &uiurlstr, results);				} else if(text.CmpNoCase(						  // Mid() might be buggy.						  word.Mid(0, text.Length()))					  < -1)					break;			}			if(results->size() >= MAX_SEARCH_RESULTS)				break;		}		} while(!wholeWords && word.StartsWith(text.c_str()) && node_offset);	return partial;}bool CHMFile::ResolveObject(const wxString& fileName, chmUnitInfo *ui){	return _chmFile != NULL && 		::chm_resolve_object(_chmFile, 				     static_cast<const char *>(					     fileName.mb_str()), 				     ui)		== CHM_RESOLVE_SUCCESS;}size_t CHMFile::RetrieveObject(chmUnitInfo *ui, unsigned char *buffer,			       off_t fileOffset, size_t bufferSize){	return ::chm_retrieve_object(_chmFile, ui, buffer, fileOffset,				     bufferSize);}bool CHMFile::GetArchiveInfo(){	bool retw = InfoFromWindows();	bool rets = InfoFromSystem();
12 下一页
💿 文件大小 327 K
👤 上传用户 yitiaojin135
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#LINUX #CHM
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -