📄 libchmfileimpl.cpp
字号:
/*************************************************************************** * Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com * * Portions Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net> * * Please do not use email address above for bug reports; see * * the README file * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/#include <sys/types.h>#include <qcursor.h>#include <qfile.h>#include <qapplication.h>#include "config.h"#include "chm_lib.h"#include "bitfiddle.h"#include "libchmfile.h"#include "libchmurlfactory.h"#include "libchmfileimpl.h"// Big-enough buffer size for use with various routines.#define BUF_SIZE 4096#define COMMON_BUF_LEN 1025#define TOPICS_ENTRY_LEN 16#define URLTBL_ENTRY_LEN 12//#define DEBUGPARSER(A) qDebug A#define DEBUGPARSER(A) ;class KCHMShowWaitCursor{ public: KCHMShowWaitCursor () { QApplication::setOverrideCursor( QCursor(Qt::WaitCursor) ); } ~KCHMShowWaitCursor () { QApplication::restoreOverrideCursor(); }};LCHMFileImpl::LCHMFileImpl( ){ m_chmFile = NULL; m_home = m_filename = m_home = m_topicsFile = m_indexFile = m_font = QString::null; m_entityDecodeMap.clear(); m_textCodec = 0; m_textCodecForSpecialFiles = 0; m_detectedLCID = 0; m_currentEncoding = 0;}LCHMFileImpl::~ LCHMFileImpl( ){ closeAll();}bool LCHMFileImpl::loadFile( const QString & archiveName ){ if( m_chmFile ) closeAll(); m_chmFile = chm_open( QFile::encodeName(archiveName) ); if ( m_chmFile == NULL ) return false; m_filename = archiveName; // Reset encoding m_textCodec = 0; m_textCodecForSpecialFiles = 0; m_currentEncoding = 0; // Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so) // and guess the encoding getInfoFromWindows(); getInfoFromSystem(); guessTextEncoding(); // Check whether the search tables are present if ( ResolveObject("/#TOPICS", &m_chmTOPICS) && ResolveObject("/#STRINGS", &m_chmSTRINGS) && ResolveObject("/#URLTBL", &m_chmURLTBL) && ResolveObject("/#URLSTR", &m_chmURLSTR) ) { m_lookupTablesValid = true; fillTopicsUrlMap(); } else m_lookupTablesValid = false; if ( m_lookupTablesValid && ResolveObject ("/$FIftiMain", &m_chmFIftiMain) ) m_searchAvailable = true; else m_searchAvailable = false; return true;}void LCHMFileImpl::closeAll( ){ if ( m_chmFile == NULL ) return; chm_close( m_chmFile ); m_chmFile = NULL; m_home = m_filename = m_home = m_topicsFile = m_indexFile = m_font = QString::null; m_entityDecodeMap.clear(); m_textCodec = 0; m_textCodecForSpecialFiles = 0; m_detectedLCID = 0; m_currentEncoding = 0;}QString LCHMFileImpl::decodeEntity( const QString & entity ){ // Set up m_entityDecodeMap characters according to current textCodec if ( m_entityDecodeMap.isEmpty() ) { m_entityDecodeMap["AElig"] = encodeWithCurrentCodec ("\306"); // capital AE diphthong (ligature) m_entityDecodeMap["Aacute"] = encodeWithCurrentCodec ("\301"); // capital A, acute accent m_entityDecodeMap["Acirc"] = encodeWithCurrentCodec ("\302"); // capital A, circumflex accent m_entityDecodeMap["Agrave"] = encodeWithCurrentCodec ("\300"); // capital A, grave accent m_entityDecodeMap["Aring"] = encodeWithCurrentCodec ("\305"); // capital A, ring m_entityDecodeMap["Atilde"] = encodeWithCurrentCodec ("\303"); // capital A, tilde m_entityDecodeMap["Auml"] = encodeWithCurrentCodec ("\304"); // capital A, dieresis or umlaut mark m_entityDecodeMap["Ccedil"] = encodeWithCurrentCodec ("\307"); // capital C, cedilla m_entityDecodeMap["Dstrok"] = encodeWithCurrentCodec ("\320"); // whatever m_entityDecodeMap["ETH"] = encodeWithCurrentCodec ("\320"); // capital Eth, Icelandic m_entityDecodeMap["Eacute"] = encodeWithCurrentCodec ("\311"); // capital E, acute accent m_entityDecodeMap["Ecirc"] = encodeWithCurrentCodec ("\312"); // capital E, circumflex accent m_entityDecodeMap["Egrave"] = encodeWithCurrentCodec ("\310"); // capital E, grave accent m_entityDecodeMap["Euml"] = encodeWithCurrentCodec ("\313"); // capital E, dieresis or umlaut mark m_entityDecodeMap["Iacute"] = encodeWithCurrentCodec ("\315"); // capital I, acute accent m_entityDecodeMap["Icirc"] = encodeWithCurrentCodec ("\316"); // capital I, circumflex accent m_entityDecodeMap["Igrave"] = encodeWithCurrentCodec ("\314"); // capital I, grave accent m_entityDecodeMap["Iuml"] = encodeWithCurrentCodec ("\317"); // capital I, dieresis or umlaut mark m_entityDecodeMap["Ntilde"] = encodeWithCurrentCodec ("\321"); // capital N, tilde m_entityDecodeMap["Oacute"] = encodeWithCurrentCodec ("\323"); // capital O, acute accent m_entityDecodeMap["Ocirc"] = encodeWithCurrentCodec ("\324"); // capital O, circumflex accent m_entityDecodeMap["Ograve"] = encodeWithCurrentCodec ("\322"); // capital O, grave accent m_entityDecodeMap["Oslash"] = encodeWithCurrentCodec ("\330"); // capital O, slash m_entityDecodeMap["Otilde"] = encodeWithCurrentCodec ("\325"); // capital O, tilde m_entityDecodeMap["Ouml"] = encodeWithCurrentCodec ("\326"); // capital O, dieresis or umlaut mark m_entityDecodeMap["THORN"] = encodeWithCurrentCodec ("\336"); // capital THORN, Icelandic m_entityDecodeMap["Uacute"] = encodeWithCurrentCodec ("\332"); // capital U, acute accent m_entityDecodeMap["Ucirc"] = encodeWithCurrentCodec ("\333"); // capital U, circumflex accent m_entityDecodeMap["Ugrave"] = encodeWithCurrentCodec ("\331"); // capital U, grave accent m_entityDecodeMap["Uuml"] = encodeWithCurrentCodec ("\334"); // capital U, dieresis or umlaut mark m_entityDecodeMap["Yacute"] = encodeWithCurrentCodec ("\335"); // capital Y, acute accent m_entityDecodeMap["OElig"] = encodeWithCurrentCodec ("\338"); // capital Y, acute accent m_entityDecodeMap["oelig"] = encodeWithCurrentCodec ("\339"); // capital Y, acute accent m_entityDecodeMap["aacute"] = encodeWithCurrentCodec ("\341"); // small a, acute accent m_entityDecodeMap["acirc"] = encodeWithCurrentCodec ("\342"); // small a, circumflex accent m_entityDecodeMap["aelig"] = encodeWithCurrentCodec ("\346"); // small ae diphthong (ligature) m_entityDecodeMap["agrave"] = encodeWithCurrentCodec ("\340"); // small a, grave accent m_entityDecodeMap["aring"] = encodeWithCurrentCodec ("\345"); // small a, ring m_entityDecodeMap["atilde"] = encodeWithCurrentCodec ("\343"); // small a, tilde m_entityDecodeMap["auml"] = encodeWithCurrentCodec ("\344"); // small a, dieresis or umlaut mark m_entityDecodeMap["ccedil"] = encodeWithCurrentCodec ("\347"); // small c, cedilla m_entityDecodeMap["eacute"] = encodeWithCurrentCodec ("\351"); // small e, acute accent m_entityDecodeMap["ecirc"] = encodeWithCurrentCodec ("\352"); // small e, circumflex accent m_entityDecodeMap["Scaron"] = encodeWithCurrentCodec ("\352"); // small e, circumflex accent m_entityDecodeMap["egrave"] = encodeWithCurrentCodec ("\350"); // small e, grave accent m_entityDecodeMap["eth"] = encodeWithCurrentCodec ("\360"); // small eth, Icelandic m_entityDecodeMap["euml"] = encodeWithCurrentCodec ("\353"); // small e, dieresis or umlaut mark m_entityDecodeMap["iacute"] = encodeWithCurrentCodec ("\355"); // small i, acute accent m_entityDecodeMap["icirc"] = encodeWithCurrentCodec ("\356"); // small i, circumflex accent m_entityDecodeMap["igrave"] = encodeWithCurrentCodec ("\354"); // small i, grave accent m_entityDecodeMap["iuml"] = encodeWithCurrentCodec ("\357"); // small i, dieresis or umlaut mark m_entityDecodeMap["ntilde"] = encodeWithCurrentCodec ("\361"); // small n, tilde m_entityDecodeMap["oacute"] = encodeWithCurrentCodec ("\363"); // small o, acute accent m_entityDecodeMap["ocirc"] = encodeWithCurrentCodec ("\364"); // small o, circumflex accent m_entityDecodeMap["ograve"] = encodeWithCurrentCodec ("\362"); // small o, grave accent m_entityDecodeMap["oslash"] = encodeWithCurrentCodec ("\370"); // small o, slash m_entityDecodeMap["otilde"] = encodeWithCurrentCodec ("\365"); // small o, tilde m_entityDecodeMap["ouml"] = encodeWithCurrentCodec ("\366"); // small o, dieresis or umlaut mark m_entityDecodeMap["szlig"] = encodeWithCurrentCodec ("\337"); // small sharp s, German (sz ligature) m_entityDecodeMap["thorn"] = encodeWithCurrentCodec ("\376"); // small thorn, Icelandic m_entityDecodeMap["uacute"] = encodeWithCurrentCodec ("\372"); // small u, acute accent m_entityDecodeMap["ucirc"] = encodeWithCurrentCodec ("\373"); // small u, circumflex accent m_entityDecodeMap["ugrave"] = encodeWithCurrentCodec ("\371"); // small u, grave accent m_entityDecodeMap["uuml"] = encodeWithCurrentCodec ("\374"); // small u, dieresis or umlaut mark m_entityDecodeMap["yacute"] = encodeWithCurrentCodec ("\375"); // small y, acute accent m_entityDecodeMap["yuml"] = encodeWithCurrentCodec ("\377"); // small y, dieresis or umlaut mark m_entityDecodeMap["iexcl"] = encodeWithCurrentCodec ("\241"); m_entityDecodeMap["cent"] = encodeWithCurrentCodec ("\242"); m_entityDecodeMap["pound"] = encodeWithCurrentCodec ("\243"); m_entityDecodeMap["curren"] = encodeWithCurrentCodec ("\244"); m_entityDecodeMap["yen"] = encodeWithCurrentCodec ("\245"); m_entityDecodeMap["brvbar"] = encodeWithCurrentCodec ("\246"); m_entityDecodeMap["sect"] = encodeWithCurrentCodec ("\247"); m_entityDecodeMap["uml"] = encodeWithCurrentCodec ("\250"); m_entityDecodeMap["ordf"] = encodeWithCurrentCodec ("\252"); m_entityDecodeMap["laquo"] = encodeWithCurrentCodec ("\253"); m_entityDecodeMap["not"] = encodeWithCurrentCodec ("\254"); m_entityDecodeMap["shy"] = encodeWithCurrentCodec ("\255"); m_entityDecodeMap["macr"] = encodeWithCurrentCodec ("\257"); m_entityDecodeMap["deg"] = encodeWithCurrentCodec ("\260"); m_entityDecodeMap["plusmn"] = encodeWithCurrentCodec ("\261"); m_entityDecodeMap["sup1"] = encodeWithCurrentCodec ("\271"); m_entityDecodeMap["sup2"] = encodeWithCurrentCodec ("\262"); m_entityDecodeMap["sup3"] = encodeWithCurrentCodec ("\263"); m_entityDecodeMap["acute"] = encodeWithCurrentCodec ("\264"); m_entityDecodeMap["micro"] = encodeWithCurrentCodec ("\265"); m_entityDecodeMap["para"] = encodeWithCurrentCodec ("\266"); m_entityDecodeMap["middot"] = encodeWithCurrentCodec ("\267"); m_entityDecodeMap["cedil"] = encodeWithCurrentCodec ("\270"); m_entityDecodeMap["ordm"] = encodeWithCurrentCodec ("\272"); m_entityDecodeMap["raquo"] = encodeWithCurrentCodec ("\273"); m_entityDecodeMap["frac14"] = encodeWithCurrentCodec ("\274"); m_entityDecodeMap["frac12"] = encodeWithCurrentCodec ("\275"); m_entityDecodeMap["frac34"] = encodeWithCurrentCodec ("\276"); m_entityDecodeMap["iquest"] = encodeWithCurrentCodec ("\277"); m_entityDecodeMap["times"] = encodeWithCurrentCodec ("\327"); m_entityDecodeMap["divide"] = encodeWithCurrentCodec ("\367"); m_entityDecodeMap["copy"] = encodeWithCurrentCodec ("\251"); // copyright sign m_entityDecodeMap["reg"] = encodeWithCurrentCodec ("\256"); // registered sign m_entityDecodeMap["nbsp"] = encodeWithCurrentCodec ("\240"); // non breaking space m_entityDecodeMap["fnof"] = QChar((unsigned short) 402); m_entityDecodeMap["Delta"] = QChar((unsigned short) 916); m_entityDecodeMap["Pi"] = QChar((unsigned short) 928); m_entityDecodeMap["Sigma"] = QChar((unsigned short) 931); m_entityDecodeMap["beta"] = QChar((unsigned short) 946); m_entityDecodeMap["gamma"] = QChar((unsigned short) 947); m_entityDecodeMap["delta"] = QChar((unsigned short) 948); m_entityDecodeMap["eta"] = QChar((unsigned short) 951); m_entityDecodeMap["theta"] = QChar((unsigned short) 952); m_entityDecodeMap["lambda"] = QChar((unsigned short) 955); m_entityDecodeMap["mu"] = QChar((unsigned short) 956); m_entityDecodeMap["nu"] = QChar((unsigned short) 957); m_entityDecodeMap["pi"] = QChar((unsigned short) 960); m_entityDecodeMap["rho"] = QChar((unsigned short) 961); m_entityDecodeMap["lsquo"] = QChar((unsigned short) 8216); m_entityDecodeMap["rsquo"] = QChar((unsigned short) 8217); m_entityDecodeMap["rdquo"] = QChar((unsigned short) 8221); m_entityDecodeMap["bdquo"] = QChar((unsigned short) 8222); m_entityDecodeMap["trade"] = QChar((unsigned short) 8482); m_entityDecodeMap["ldquo"] = QChar((unsigned short) 8220); m_entityDecodeMap["ndash"] = QChar((unsigned short) 8211); m_entityDecodeMap["mdash"] = QChar((unsigned short) 8212); m_entityDecodeMap["bull"] = QChar((unsigned short) 8226); m_entityDecodeMap["hellip"] = QChar((unsigned short) 8230); m_entityDecodeMap["emsp"] = QChar((unsigned short) 8195); m_entityDecodeMap["rarr"] = QChar((unsigned short) 8594); m_entityDecodeMap["rArr"] = QChar((unsigned short) 8658); m_entityDecodeMap["crarr"] = QChar((unsigned short) 8629); m_entityDecodeMap["le"] = QChar((unsigned short) 8804); m_entityDecodeMap["ge"] = QChar((unsigned short) 8805); m_entityDecodeMap["lte"] = QChar((unsigned short) 8804); // wrong, but used somewhere m_entityDecodeMap["gte"] = QChar((unsigned short) 8805); // wrong, but used somewhere m_entityDecodeMap["dagger"] = QChar((unsigned short) 8224); m_entityDecodeMap["Dagger"] = QChar((unsigned short) 8225); m_entityDecodeMap["euro"] = QChar((unsigned short) 8364); m_entityDecodeMap["asymp"] = QChar((unsigned short) 8776); m_entityDecodeMap["isin"] = QChar((unsigned short) 8712); m_entityDecodeMap["notin"] = QChar((unsigned short) 8713); m_entityDecodeMap["prod"] = QChar((unsigned short) 8719); m_entityDecodeMap["ne"] = QChar((unsigned short) 8800); m_entityDecodeMap["amp"] = "&"; // ampersand m_entityDecodeMap["gt"] = ">"; // greater than m_entityDecodeMap["lt"] = "<"; // less than m_entityDecodeMap["quot"] = "\""; // double quote m_entityDecodeMap["apos"] = "'"; // single quote m_entityDecodeMap["frasl"] = "/"; m_entityDecodeMap["minus"] = "-"; m_entityDecodeMap["oplus"] = "+"; m_entityDecodeMap["Prime"] = "\""; } // If entity is an ASCII code like 〽 - just decode it if ( entity[0] == '#' ) { bool valid; unsigned int ascode = entity.mid(1).toUInt( &valid ); if ( !valid ) { qWarning ( "LCHMFileImpl::decodeEntity: could not decode HTML entity '%s'", entity.ascii() ); return QString::null; } return (QString) (QChar( ascode )); } else { QMap<QString, QString>::const_iterator it = m_entityDecodeMap.find( entity ); if ( it == m_entityDecodeMap.end() ) { qWarning ("LCHMFileImpl::decodeEntity: could not decode HTML entity '%s'", entity.ascii()); return QString::null; } return *it; }}inline int LCHMFileImpl::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities){ int qbegin = tag.find ('"', offset); if ( qbegin == -1 ) qFatal ("LCHMFileImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", tag.ascii()); int qend = firstquote ? tag.find ('"', qbegin + 1) : tag.findRev ('"'); if ( qend == -1 || qend <= qbegin ) qFatal ("LCHMFileImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", tag.ascii()); // If we do not need to decode HTML entities, just return. if ( decodeentities ) { QString htmlentity = QString::null; bool fill_entity = false; value.reserve (qend - qbegin); // to avoid multiple memory allocations for ( int i = qbegin + 1; i < qend; i++ ) { if ( !fill_entity ) { if ( tag[i] == '&' ) // HTML entity starts fill_entity = true; else value.append (tag[i]); } else { if ( tag[i] == ';' ) // HTML entity ends { // If entity is an ASCII code, just decode it QString decode = decodeEntity( htmlentity ); if ( decode.isNull() ) break; value.append ( decode ); htmlentity = QString::null; fill_entity = false; } else htmlentity.append (tag[i]); } } } else value = tag.mid (qbegin + 1, qend - qbegin - 1); return qend + 1;}bool LCHMFileImpl::searchWord (const QString& text, bool wholeWords, bool titlesOnly, LCHMSearchProgressResults& results, bool phrase_search){ bool partial = false; if ( text.isEmpty() || !m_searchAvailable ) return false; QString searchword = (QString) convertSearchWord (text);#define FTS_HEADER_LEN 0x32 unsigned char header[FTS_HEADER_LEN]; if ( RetrieveObject (&m_chmFIftiMain, header, 0, FTS_HEADER_LEN) == 0 ) return false; unsigned char doc_index_s = header[0x1E], doc_index_r = header[0x1F]; unsigned char code_count_s = header[0x20], code_count_r = header[0x21]; unsigned char loc_codes_s = header[0x22], loc_codes_r = header[0x23]; if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2) { // Don't know how to use values other than 2 yet. Maybe next chmspec. return false; } unsigned char* cursor32 = header + 0x14; u_int32_t node_offset = UINT32ARRAY(cursor32); cursor32 = header + 0x2e; u_int32_t node_len = UINT32ARRAY(cursor32); unsigned char* cursor16 = header + 0x18; u_int16_t tree_depth = UINT16ARRAY(cursor16); unsigned char word_len, pos; QString word; u_int32_t i = sizeof(u_int16_t); u_int16_t free_space;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -