📄 chmfile.cpp
字号:
/* Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net> XML-RPC/Context ID code contributed by Eamon Millman / PCI Geomatics <millman@pcigeomatics.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#include <chmfile.h>#include <contenttaghandler.h>#include <chmlistctrl.h>#include <wx/wx.h>#include <wx/defs.h>#include <wx/strconv.h>#include <wx/fontmap.h>#include <wx/treectrl.h>#include <assert.h>#include <bitfiddle.inl>namespace {// damn wxWidgets and it's scoped ptr.class UCharPtr {public: UCharPtr(unsigned char *p) : _p(p) {} ~UCharPtr() { delete[] _p; } unsigned char *get() { return _p; }private: UCharPtr(const UCharPtr&); UCharPtr& operator=(const UCharPtr&);private: unsigned char *_p;};} // namespace// Big-enough buffer size for use with various routines.#define BUF_SIZE 4096// Thanks to Vadim Zeitlin.#define ANSI_CHARSET 0#define DEFAULT_CHARSET 1#define SYMBOL_CHARSET 2#define SHIFTJIS_CHARSET 128#define HANGEUL_CHARSET 129#define HANGUL_CHARSET 129#define GB2312_CHARSET 134#define CHINESEBIG5_CHARSET 136#define OEM_CHARSET 255#define JOHAB_CHARSET 130#define HEBREW_CHARSET 177#define ARABIC_CHARSET 178#define GREEK_CHARSET 161#define TURKISH_CHARSET 162#define VIETNAMESE_CHARSET 163#define THAI_CHARSET 222#define EASTEUROPE_CHARSET 238#define RUSSIAN_CHARSET 204#define MAC_CHARSET 77#define BALTIC_CHARSET 186// Hello, Microsoft#define LANG_NEUTRAL 0x00 // check#define LANG_ARABIC 0x01 // check#define LANG_BULGARIAN 0x02 // check#define LANG_CATALAN 0x03#define LANG_CHINESE 0x04 // check#define LANG_CZECH 0x05#define LANG_DANISH 0x06#define LANG_GERMAN 0x07#define LANG_GREEK 0x08 // check#define LANG_ENGLISH 0x09#define LANG_SPANISH 0x0a#define LANG_FINNISH 0x0b#define LANG_FRENCH 0x0c#define LANG_HEBREW 0x0d // check#define LANG_HUNGARIAN 0x0e#define LANG_ICELANDIC 0x0f#define LANG_ITALIAN 0x10#define LANG_JAPANESE 0x11 // check#define LANG_KOREAN 0x12 // check#define LANG_DUTCH 0x13#define LANG_NORWEGIAN 0x14#define LANG_POLISH 0x15#define LANG_PORTUGUESE 0x16#define LANG_ROMANIAN 0x18#define LANG_RUSSIAN 0x19 // check#define LANG_CROATIAN 0x1a#define LANG_SERBIAN 0x1a#define LANG_SLOVAK 0x1b#define LANG_ALBANIAN 0x1c#define LANG_SWEDISH 0x1d#define LANG_THAI 0x1e // check#define LANG_TURKISH 0x1f // check#define LANG_URDU 0x20#define LANG_INDONESIAN 0x21#define LANG_UKRAINIAN 0x22 // check#define LANG_BELARUSIAN 0x23#define LANG_SLOVENIAN 0x24#define LANG_ESTONIAN 0x25#define LANG_LATVIAN 0x26#define LANG_LITHUANIAN 0x27#define LANG_FARSI 0x29#define LANG_VIETNAMESE 0x2a#define LANG_ARMENIAN 0x2b#define LANG_AZERI 0x2c#define LANG_BASQUE 0x2d#define LANG_MACEDONIAN 0x2f#define LANG_AFRIKAANS 0x36#define LANG_GEORGIAN 0x37#define LANG_FAEROESE 0x38#define LANG_HINDI 0x39#define LANG_MALAY 0x3e#define LANG_KAZAK 0x3f#define LANG_KYRGYZ 0x40#define LANG_SWAHILI 0x41#define LANG_UZBEK 0x43#define LANG_TATAR 0x44#define LANG_BENGALI 0x45#define LANG_PUNJABI 0x46#define LANG_GUJARATI 0x47#define LANG_ORIYA 0x48#define LANG_TAMIL 0x49#define LANG_TELUGU 0x4a#define LANG_KANNADA 0x4b#define LANG_MALAYALAM 0x4c#define LANG_ASSAMESE 0x4d#define LANG_MARATHI 0x4e#define LANG_SANSKRIT 0x4f#define LANG_MONGOLIAN 0x50#define LANG_GALICIAN 0x56#define LANG_KONKANI 0x57#define LANG_MANIPURI 0x58#define LANG_SINDHI 0x59#define LANG_SYRIAC 0x5a#define LANG_KASHMIRI 0x60#define LANG_NEPALI 0x61#define LANG_DIVEHI 0x65CHMFile::CHMFile() : _chmFile(NULL), _home(wxT("/")){}CHMFile::CHMFile(const wxString& archiveName) : _chmFile(NULL), _home(wxT("/")){ LoadCHM(archiveName);}CHMFile::~CHMFile(){ CloseCHM();}bool CHMFile::LoadCHM(const wxString& archiveName){ if(_chmFile) CloseCHM(); assert(_chmFile == NULL); _chmFile = chm_open(static_cast<const char *>(archiveName.mb_str())); if(_chmFile == NULL) return false; _enc = wxFONTENCODING_SYSTEM; _filename = archiveName; GetArchiveInfo(); LoadContextIDs();#if wxUSE_UNICODE // Fix the title if(_enc != wxFONTENCODING_SYSTEM) { wxCSConv cv(_enc); wchar_t buf2[BUF_SIZE]; size_t len = (_title.length() < BUF_SIZE) ? _title.length() : BUF_SIZE; size_t ret = cv.MB2WC(buf2, _title.mb_str(), len); if(ret) _title = wxString(buf2, ret); }#endif return true;}void CHMFile::CloseCHM(){ if(_chmFile == NULL) return; chm_close(_chmFile); _cidMap.clear(); _chmFile = NULL; _home = wxT("/"); _filename = _home = _topicsFile = _indexFile = _title = _font = wxEmptyString;}bool CHMFile::GetTopicsTree(wxTreeCtrl *toBuild){ chmUnitInfo ui; if(!toBuild) return false; if(_topicsFile.IsEmpty() || !ResolveObject(_topicsFile, &ui)) return false; wxString src; src.Alloc(ui.length); GetFileAsString(src, &ui); if(src.IsEmpty()) return false; ContentParser parser; parser.AddTagHandler(new ContentTagHandler(_enc, toBuild)); parser.Parse(src); return true;}bool CHMFile::GetIndex(CHMListCtrl* toBuild){ chmUnitInfo ui; if(!toBuild) return false; if(_indexFile.IsEmpty() || !ResolveObject(_indexFile, &ui)) return false; wxString src; src.Alloc(ui.length); GetFileAsString(src, &ui); if(src.IsEmpty()) return false; ContentParser parser; parser.AddTagHandler(new ContentTagHandler(_enc, NULL, toBuild)); parser.Parse(src); toBuild->UpdateUI(); return true;}bool CHMFile::LoadContextIDs(){ chmUnitInfo ivb_ui, strs_ui; _cidMap.clear(); // make sure what we need is there. // #IVB has list of context ID's and #STRINGS offsets to file names. if( chm_resolve_object(_chmFile, "/#IVB", &ivb_ui ) != CHM_RESOLVE_SUCCESS || chm_resolve_object(_chmFile, "/#STRINGS", &strs_ui) != CHM_RESOLVE_SUCCESS ) return false; // failed to find internal files UCharPtr ivb_buf(new unsigned char[ivb_ui.length]); u_int64_t ivb_len = 0; if((ivb_len = chm_retrieve_object(_chmFile, &ivb_ui, ivb_buf.get(), 0, ivb_ui.length)) == 0 ) return false; // failed to retrieve data // always odd (DWORD + 2(n)*DWORD, so make even ivb_len = ivb_len/sizeof(u_int32_t) - 1; if( ivb_len % 2 != 0 ) return false; // we retrieved unexpected data from the file. u_int32_t *ivbs = new u_int32_t[ivb_len]; int j = 4; // offset to exclude first DWORD // convert our DWORDs to numbers for( unsigned int i = 0; i < ivb_len; i++ ) { ivbs[i] = UINT32ARRAY(ivb_buf.get() + j); j+=4; // step to the next DWORD } UCharPtr strs_buf(new unsigned char[strs_ui.length]); u_int64_t strs_len = 0; if( (strs_len = chm_retrieve_object(_chmFile, &strs_ui, strs_buf.get(), 0, strs_ui.length)) == 0 ) { delete[] ivbs; return false; // failed to retrieve data } for( unsigned int i = 0; i < ivb_len; i+=2 ) { // context-IDs as KEY, fileName from #STRINGS as VALUE _cidMap[ivbs[i]] = CURRENT_CHAR_STRING( strs_buf.get() + ivbs[i+1]); } delete[] ivbs; // everything went well! return true;}bool CHMFile::IsValidCID( const int contextID ){ if(_cidMap.empty()) return FALSE; CHMIDMap::iterator itr = _cidMap.find( contextID ); if( itr == _cidMap.end() ) return FALSE; return TRUE;}wxString CHMFile::GetPageByCID( const int contextID ){ if(_cidMap.empty()) return wxT("/"); CHMIDMap::iterator itr = _cidMap.find( contextID ); // make sure the key/value pair is valid if(itr == _cidMap.end() ) return wxT("/"); return wxString(wxT("/")) + itr->second;}bool CHMFile::IndexSearch(const wxString& text, bool wholeWords, bool titlesOnly, CHMSearchResults *results){ bool partial = false; if(text.IsEmpty()) return false; chmUnitInfo ui, uitopics, uiurltbl, uistrings, uiurlstr; if(::chm_resolve_object(_chmFile, "/$FIftiMain", &ui) != CHM_RESOLVE_SUCCESS || ::chm_resolve_object(_chmFile, "/#TOPICS", &uitopics) != CHM_RESOLVE_SUCCESS || ::chm_resolve_object(_chmFile, "/#STRINGS", &uistrings) != CHM_RESOLVE_SUCCESS || ::chm_resolve_object(_chmFile, "/#URLTBL", &uiurltbl) != CHM_RESOLVE_SUCCESS || ::chm_resolve_object(_chmFile, "/#URLSTR", &uiurlstr) != CHM_RESOLVE_SUCCESS) return false;#define FTS_HEADER_LEN 0x32 unsigned char header[FTS_HEADER_LEN]; if(::chm_retrieve_object(_chmFile, &ui, header, 0, FTS_HEADER_LEN) == 0) return false; unsigned char doc_index_s = header[0x1E], doc_index_r = header[0x1F]; unsigned char code_count_s = header[0x20], code_count_r = header[0x21]; unsigned char loc_codes_s = header[0x22], loc_codes_r = header[0x23]; if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) { // Don't know how to use values other than 2 yet. Maybe // next chmspec. return false; } unsigned char* cursor32 = header + 0x14; u_int32_t node_offset = UINT32ARRAY(cursor32); cursor32 = header + 0x2e; u_int32_t node_len = UINT32ARRAY(cursor32); unsigned char* cursor16 = header + 0x18; u_int16_t tree_depth = UINT16ARRAY(cursor16); unsigned char word_len, pos; wxString word; u_int32_t i = sizeof(u_int16_t); u_int16_t free_space; UCharPtr buffer(new unsigned char[node_len]); node_offset = GetLeafNodeOffset(text, node_offset, node_len, tree_depth, &ui); if(!node_offset) return false; do { // got a leaf node here. if(::chm_retrieve_object(_chmFile, &ui, buffer.get(), node_offset, node_len) == 0) return false; cursor16 = buffer.get() + 6; free_space = UINT16ARRAY(cursor16); i = sizeof(u_int32_t) + sizeof(u_int16_t) + sizeof(u_int16_t); u_int64_t wlc_count, wlc_size; u_int32_t wlc_offset; while(i < node_len - free_space) { word_len = *(buffer.get() + i); pos = *(buffer.get() + i + 1); char *wrd_buf = new char[word_len]; memcpy(wrd_buf, buffer.get() + i + 2, word_len - 1); wrd_buf[word_len - 1] = 0; if(pos == 0) word = CURRENT_CHAR_STRING(wrd_buf); else word = word.Mid(0, pos) + CURRENT_CHAR_STRING(wrd_buf); delete[] wrd_buf; i += 2 + word_len; unsigned char title = *(buffer.get() + i - 1); size_t encsz; wlc_count = be_encint(buffer.get() + i, encsz); i += encsz; cursor32 = buffer.get() + i; wlc_offset = UINT32ARRAY(cursor32); i += sizeof(u_int32_t) + sizeof(u_int16_t); wlc_size = be_encint(buffer.get() + i, encsz); i += encsz; cursor32 = buffer.get(); node_offset = UINT32ARRAY(cursor32); if(!title && titlesOnly) continue; if(wholeWords && !text.CmpNoCase(word)) return ProcessWLC(wlc_count, wlc_size, wlc_offset, doc_index_s, doc_index_r,code_count_s, code_count_r, loc_codes_s, loc_codes_r, &ui, &uiurltbl, &uistrings, &uitopics, &uiurlstr, results); if(!wholeWords) { if(word.StartsWith(text.c_str())) { partial = true; ProcessWLC(wlc_count, wlc_size, wlc_offset, doc_index_s, doc_index_r,code_count_s, code_count_r, loc_codes_s, loc_codes_r, &ui, &uiurltbl, &uistrings, &uitopics, &uiurlstr, results); } else if(text.CmpNoCase( // Mid() might be buggy. word.Mid(0, text.Length())) < -1) break; } if(results->size() >= MAX_SEARCH_RESULTS) break; } } while(!wholeWords && word.StartsWith(text.c_str()) && node_offset); return partial;}bool CHMFile::ResolveObject(const wxString& fileName, chmUnitInfo *ui){ return _chmFile != NULL && ::chm_resolve_object(_chmFile, static_cast<const char *>( fileName.mb_str()), ui) == CHM_RESOLVE_SUCCESS;}size_t CHMFile::RetrieveObject(chmUnitInfo *ui, unsigned char *buffer, off_t fileOffset, size_t bufferSize){ return ::chm_retrieve_object(_chmFile, ui, buffer, fileOffset, bufferSize);}bool CHMFile::GetArchiveInfo(){ bool retw = InfoFromWindows(); bool rets = InfoFromSystem();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -