📄 libchmfileimpl.cpp

📁 chm阅读器用于在linux系统下chm文档的阅读
💻 CPP
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*************************************************************************** *   Copyright (C) 2004-2007 by Georgy Yunaev, gyunaev@ulduzsoft.com       * *   Portions Copyright (C) 2003  Razvan Cojocaru <razvanco@gmx.net>       *   *   Please do not use email address above for bug reports; see            * *   the README file                                                       * *                                                                         * *   This program is free software; you can redistribute it and/or modify  * *   it under the terms of the GNU General Public License as published by  * *   the Free Software Foundation; either version 2 of the License, or     * *   (at your option) any later version.                                   * *                                                                         * *   This program is distributed in the hope that it will be useful,       * *   but WITHOUT ANY WARRANTY; without even the implied warranty of        * *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         * *   GNU General Public License for more details.                          * *                                                                         * *   You should have received a copy of the GNU General Public License     * *   along with this program; if not, write to the                         * *   Free Software Foundation, Inc.,                                       * *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             * ***************************************************************************/#include <sys/types.h>#include <qcursor.h>#include <qfile.h>#include <qapplication.h>#include "config.h"#include "chm_lib.h"#include "bitfiddle.h"#include "libchmfile.h"#include "libchmurlfactory.h"#include "libchmfileimpl.h"// Big-enough buffer size for use with various routines.#define BUF_SIZE 4096#define COMMON_BUF_LEN 1025#define TOPICS_ENTRY_LEN 16#define URLTBL_ENTRY_LEN 12//#define DEBUGPARSER(A)	qDebug A#define DEBUGPARSER(A)	;class KCHMShowWaitCursor{	public:		KCHMShowWaitCursor () { QApplication::setOverrideCursor( QCursor(Qt::WaitCursor) ); }		~KCHMShowWaitCursor () { QApplication::restoreOverrideCursor(); }};LCHMFileImpl::LCHMFileImpl( ){	m_chmFile = NULL;	m_home = m_filename = m_home = m_topicsFile = m_indexFile = m_font = QString::null;	m_entityDecodeMap.clear();	m_textCodec = 0;	m_textCodecForSpecialFiles = 0;	m_detectedLCID = 0;	m_currentEncoding = 0;}LCHMFileImpl::~ LCHMFileImpl( ){	closeAll();}bool LCHMFileImpl::loadFile( const QString & archiveName ){	if( m_chmFile )		closeAll();	m_chmFile = chm_open( QFile::encodeName(archiveName) );		if ( m_chmFile == NULL )		return false;	m_filename = archiveName;		// Reset encoding	m_textCodec = 0;	m_textCodecForSpecialFiles = 0;	m_currentEncoding = 0;		// Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so)	// and guess the encoding	getInfoFromWindows();	getInfoFromSystem();	guessTextEncoding();	// Check whether the search tables are present	if ( ResolveObject("/#TOPICS", &m_chmTOPICS)			&& ResolveObject("/#STRINGS", &m_chmSTRINGS)			&& ResolveObject("/#URLTBL", &m_chmURLTBL)			&& ResolveObject("/#URLSTR", &m_chmURLSTR) )	{		m_lookupTablesValid = true;		fillTopicsUrlMap();	}	else		m_lookupTablesValid = false;	if ( m_lookupTablesValid && ResolveObject ("/$FIftiMain", &m_chmFIftiMain) )		m_searchAvailable = true;	else		m_searchAvailable = false;		return true;}void LCHMFileImpl::closeAll( ){	if ( m_chmFile == NULL )		return;	chm_close( m_chmFile );		m_chmFile = NULL;	m_home = m_filename = m_home = m_topicsFile = m_indexFile = m_font = QString::null;	m_entityDecodeMap.clear();	m_textCodec = 0;	m_textCodecForSpecialFiles = 0;	m_detectedLCID = 0;	m_currentEncoding = 0;}QString LCHMFileImpl::decodeEntity( const QString & entity ){	// Set up m_entityDecodeMap characters according to current textCodec	if ( m_entityDecodeMap.isEmpty() )	{		m_entityDecodeMap["AElig"]	= encodeWithCurrentCodec ("\306"); // capital AE diphthong (ligature)		m_entityDecodeMap["Aacute"]	= encodeWithCurrentCodec ("\301"); // capital A, acute accent		m_entityDecodeMap["Acirc"]	= encodeWithCurrentCodec ("\302"); // capital A, circumflex accent		m_entityDecodeMap["Agrave"]	= encodeWithCurrentCodec ("\300"); // capital A, grave accent		m_entityDecodeMap["Aring"]	= encodeWithCurrentCodec ("\305"); // capital A, ring		m_entityDecodeMap["Atilde"]	= encodeWithCurrentCodec ("\303"); // capital A, tilde		m_entityDecodeMap["Auml"]	= encodeWithCurrentCodec ("\304"); // capital A, dieresis or umlaut mark		m_entityDecodeMap["Ccedil"]	= encodeWithCurrentCodec ("\307"); // capital C, cedilla		m_entityDecodeMap["Dstrok"]	= encodeWithCurrentCodec ("\320"); // whatever		m_entityDecodeMap["ETH"]	= encodeWithCurrentCodec ("\320"); // capital Eth, Icelandic		m_entityDecodeMap["Eacute"]	= encodeWithCurrentCodec ("\311"); // capital E, acute accent		m_entityDecodeMap["Ecirc"]	= encodeWithCurrentCodec ("\312"); // capital E, circumflex accent		m_entityDecodeMap["Egrave"]	= encodeWithCurrentCodec ("\310"); // capital E, grave accent		m_entityDecodeMap["Euml"]	= encodeWithCurrentCodec ("\313"); // capital E, dieresis or umlaut mark		m_entityDecodeMap["Iacute"]	= encodeWithCurrentCodec ("\315"); // capital I, acute accent		m_entityDecodeMap["Icirc"]	= encodeWithCurrentCodec ("\316"); // capital I, circumflex accent		m_entityDecodeMap["Igrave"]	= encodeWithCurrentCodec ("\314"); // capital I, grave accent		m_entityDecodeMap["Iuml"]	= encodeWithCurrentCodec ("\317"); // capital I, dieresis or umlaut mark		m_entityDecodeMap["Ntilde"]	= encodeWithCurrentCodec ("\321"); // capital N, tilde		m_entityDecodeMap["Oacute"]	= encodeWithCurrentCodec ("\323"); // capital O, acute accent		m_entityDecodeMap["Ocirc"]	= encodeWithCurrentCodec ("\324"); // capital O, circumflex accent		m_entityDecodeMap["Ograve"]	= encodeWithCurrentCodec ("\322"); // capital O, grave accent		m_entityDecodeMap["Oslash"]	= encodeWithCurrentCodec ("\330"); // capital O, slash		m_entityDecodeMap["Otilde"]	= encodeWithCurrentCodec ("\325"); // capital O, tilde		m_entityDecodeMap["Ouml"]	= encodeWithCurrentCodec ("\326"); // capital O, dieresis or umlaut mark		m_entityDecodeMap["THORN"]	= encodeWithCurrentCodec ("\336"); // capital THORN, Icelandic		m_entityDecodeMap["Uacute"]	= encodeWithCurrentCodec ("\332"); // capital U, acute accent		m_entityDecodeMap["Ucirc"]	= encodeWithCurrentCodec ("\333"); // capital U, circumflex accent		m_entityDecodeMap["Ugrave"]	= encodeWithCurrentCodec ("\331"); // capital U, grave accent		m_entityDecodeMap["Uuml"]	= encodeWithCurrentCodec ("\334"); // capital U, dieresis or umlaut mark		m_entityDecodeMap["Yacute"]	= encodeWithCurrentCodec ("\335"); // capital Y, acute accent		m_entityDecodeMap["OElig"]	= encodeWithCurrentCodec ("\338"); // capital Y, acute accent		m_entityDecodeMap["oelig"]	= encodeWithCurrentCodec ("\339"); // capital Y, acute accent								m_entityDecodeMap["aacute"]	= encodeWithCurrentCodec ("\341"); // small a, acute accent		m_entityDecodeMap["acirc"]	= encodeWithCurrentCodec ("\342"); // small a, circumflex accent		m_entityDecodeMap["aelig"]	= encodeWithCurrentCodec ("\346"); // small ae diphthong (ligature)		m_entityDecodeMap["agrave"]	= encodeWithCurrentCodec ("\340"); // small a, grave accent		m_entityDecodeMap["aring"]	= encodeWithCurrentCodec ("\345"); // small a, ring		m_entityDecodeMap["atilde"]	= encodeWithCurrentCodec ("\343"); // small a, tilde		m_entityDecodeMap["auml"]	= encodeWithCurrentCodec ("\344"); // small a, dieresis or umlaut mark		m_entityDecodeMap["ccedil"]	= encodeWithCurrentCodec ("\347"); // small c, cedilla		m_entityDecodeMap["eacute"]	= encodeWithCurrentCodec ("\351"); // small e, acute accent		m_entityDecodeMap["ecirc"]	= encodeWithCurrentCodec ("\352"); // small e, circumflex accent		m_entityDecodeMap["Scaron"]	= encodeWithCurrentCodec ("\352"); // small e, circumflex accent		m_entityDecodeMap["egrave"]	= encodeWithCurrentCodec ("\350"); // small e, grave accent		m_entityDecodeMap["eth"]	= encodeWithCurrentCodec ("\360"); // small eth, Icelandic		m_entityDecodeMap["euml"]	= encodeWithCurrentCodec ("\353"); // small e, dieresis or umlaut mark		m_entityDecodeMap["iacute"]	= encodeWithCurrentCodec ("\355"); // small i, acute accent		m_entityDecodeMap["icirc"]	= encodeWithCurrentCodec ("\356"); // small i, circumflex accent		m_entityDecodeMap["igrave"]	= encodeWithCurrentCodec ("\354"); // small i, grave accent		m_entityDecodeMap["iuml"]	= encodeWithCurrentCodec ("\357"); // small i, dieresis or umlaut mark		m_entityDecodeMap["ntilde"]	= encodeWithCurrentCodec ("\361"); // small n, tilde		m_entityDecodeMap["oacute"]	= encodeWithCurrentCodec ("\363"); // small o, acute accent		m_entityDecodeMap["ocirc"]	= encodeWithCurrentCodec ("\364"); // small o, circumflex accent		m_entityDecodeMap["ograve"]	= encodeWithCurrentCodec ("\362"); // small o, grave accent		m_entityDecodeMap["oslash"]	= encodeWithCurrentCodec ("\370"); // small o, slash		m_entityDecodeMap["otilde"]	= encodeWithCurrentCodec ("\365"); // small o, tilde		m_entityDecodeMap["ouml"]	= encodeWithCurrentCodec ("\366"); // small o, dieresis or umlaut mark		m_entityDecodeMap["szlig"]	= encodeWithCurrentCodec ("\337"); // small sharp s, German (sz ligature)		m_entityDecodeMap["thorn"]	= encodeWithCurrentCodec ("\376"); // small thorn, Icelandic		m_entityDecodeMap["uacute"]	= encodeWithCurrentCodec ("\372"); // small u, acute accent		m_entityDecodeMap["ucirc"]	= encodeWithCurrentCodec ("\373"); // small u, circumflex accent		m_entityDecodeMap["ugrave"]	= encodeWithCurrentCodec ("\371"); // small u, grave accent		m_entityDecodeMap["uuml"]	= encodeWithCurrentCodec ("\374"); // small u, dieresis or umlaut mark		m_entityDecodeMap["yacute"]	= encodeWithCurrentCodec ("\375"); // small y, acute accent		m_entityDecodeMap["yuml"]	= encodeWithCurrentCodec ("\377"); // small y, dieresis or umlaut mark			m_entityDecodeMap["iexcl"]	= encodeWithCurrentCodec ("\241");		m_entityDecodeMap["cent"]	= encodeWithCurrentCodec ("\242");		m_entityDecodeMap["pound"]	= encodeWithCurrentCodec ("\243");		m_entityDecodeMap["curren"]	= encodeWithCurrentCodec ("\244");		m_entityDecodeMap["yen"]	= encodeWithCurrentCodec ("\245");		m_entityDecodeMap["brvbar"]	= encodeWithCurrentCodec ("\246");		m_entityDecodeMap["sect"]	= encodeWithCurrentCodec ("\247");		m_entityDecodeMap["uml"]	= encodeWithCurrentCodec ("\250");		m_entityDecodeMap["ordf"]	= encodeWithCurrentCodec ("\252");		m_entityDecodeMap["laquo"]	= encodeWithCurrentCodec ("\253");		m_entityDecodeMap["not"]	= encodeWithCurrentCodec ("\254");		m_entityDecodeMap["shy"]	= encodeWithCurrentCodec ("\255");		m_entityDecodeMap["macr"]	= encodeWithCurrentCodec ("\257");		m_entityDecodeMap["deg"]	= encodeWithCurrentCodec ("\260");		m_entityDecodeMap["plusmn"]	= encodeWithCurrentCodec ("\261");		m_entityDecodeMap["sup1"]	= encodeWithCurrentCodec ("\271");		m_entityDecodeMap["sup2"]	= encodeWithCurrentCodec ("\262");		m_entityDecodeMap["sup3"]	= encodeWithCurrentCodec ("\263");		m_entityDecodeMap["acute"]	= encodeWithCurrentCodec ("\264");		m_entityDecodeMap["micro"]	= encodeWithCurrentCodec ("\265");		m_entityDecodeMap["para"]	= encodeWithCurrentCodec ("\266");		m_entityDecodeMap["middot"]	= encodeWithCurrentCodec ("\267");		m_entityDecodeMap["cedil"]	= encodeWithCurrentCodec ("\270");		m_entityDecodeMap["ordm"]	= encodeWithCurrentCodec ("\272");		m_entityDecodeMap["raquo"]	= encodeWithCurrentCodec ("\273");		m_entityDecodeMap["frac14"]	= encodeWithCurrentCodec ("\274");		m_entityDecodeMap["frac12"]	= encodeWithCurrentCodec ("\275");		m_entityDecodeMap["frac34"]	= encodeWithCurrentCodec ("\276");		m_entityDecodeMap["iquest"]	= encodeWithCurrentCodec ("\277");		m_entityDecodeMap["times"]	= encodeWithCurrentCodec ("\327");		m_entityDecodeMap["divide"]	= encodeWithCurrentCodec ("\367");						m_entityDecodeMap["copy"]	= encodeWithCurrentCodec ("\251"); // copyright sign		m_entityDecodeMap["reg"]	= encodeWithCurrentCodec ("\256"); // registered sign		m_entityDecodeMap["nbsp"]	= encodeWithCurrentCodec ("\240"); // non breaking space		m_entityDecodeMap["fnof"]	= QChar((unsigned short) 402);						m_entityDecodeMap["Delta"]	= QChar((unsigned short) 916);		m_entityDecodeMap["Pi"]	= QChar((unsigned short) 928);		m_entityDecodeMap["Sigma"]	= QChar((unsigned short) 931);				m_entityDecodeMap["beta"]	= QChar((unsigned short) 946);		m_entityDecodeMap["gamma"]	= QChar((unsigned short) 947);		m_entityDecodeMap["delta"]	= QChar((unsigned short) 948);		m_entityDecodeMap["eta"]	= QChar((unsigned short) 951);		m_entityDecodeMap["theta"]	= QChar((unsigned short) 952);		m_entityDecodeMap["lambda"]	= QChar((unsigned short) 955);		m_entityDecodeMap["mu"]	= QChar((unsigned short) 956);		m_entityDecodeMap["nu"]	= QChar((unsigned short) 957);		m_entityDecodeMap["pi"]	= QChar((unsigned short) 960);		m_entityDecodeMap["rho"]	= QChar((unsigned short) 961);				m_entityDecodeMap["lsquo"]	= QChar((unsigned short) 8216);		m_entityDecodeMap["rsquo"]	= QChar((unsigned short) 8217);		m_entityDecodeMap["rdquo"]	= QChar((unsigned short) 8221);		m_entityDecodeMap["bdquo"]	= QChar((unsigned short) 8222);		m_entityDecodeMap["trade"]  = QChar((unsigned short) 8482);		m_entityDecodeMap["ldquo"]  = QChar((unsigned short) 8220);		m_entityDecodeMap["ndash"]  = QChar((unsigned short) 8211);		m_entityDecodeMap["mdash"]  = QChar((unsigned short) 8212);		m_entityDecodeMap["bull"]  = QChar((unsigned short) 8226);		m_entityDecodeMap["hellip"]  = QChar((unsigned short) 8230);		m_entityDecodeMap["emsp"]  = QChar((unsigned short) 8195);		m_entityDecodeMap["rarr"]  = QChar((unsigned short) 8594);		m_entityDecodeMap["rArr"]  = QChar((unsigned short) 8658);		m_entityDecodeMap["crarr"]  = QChar((unsigned short) 8629);		m_entityDecodeMap["le"]  = QChar((unsigned short) 8804);		m_entityDecodeMap["ge"]  = QChar((unsigned short) 8805);		m_entityDecodeMap["lte"]  = QChar((unsigned short) 8804); // wrong, but used somewhere		m_entityDecodeMap["gte"]  = QChar((unsigned short) 8805); // wrong, but used somewhere		m_entityDecodeMap["dagger"]  = QChar((unsigned short) 8224);		m_entityDecodeMap["Dagger"]  = QChar((unsigned short) 8225);		m_entityDecodeMap["euro"]  = QChar((unsigned short) 8364);		m_entityDecodeMap["asymp"]  = QChar((unsigned short) 8776);		m_entityDecodeMap["isin"]  = QChar((unsigned short) 8712);		m_entityDecodeMap["notin"]  = QChar((unsigned short) 8713);		m_entityDecodeMap["prod"]  = QChar((unsigned short) 8719);		m_entityDecodeMap["ne"]  = QChar((unsigned short) 8800);						m_entityDecodeMap["amp"]	= "&";	// ampersand		m_entityDecodeMap["gt"] = ">";	// greater than		m_entityDecodeMap["lt"] = "<"; 	// less than		m_entityDecodeMap["quot"] = "\""; // double quote		m_entityDecodeMap["apos"] = "'"; 	// single quote		m_entityDecodeMap["frasl"]  = "/";		m_entityDecodeMap["minus"]  = "-";		m_entityDecodeMap["oplus"] = "+";		m_entityDecodeMap["Prime"] = "\"";	}	// If entity is an ASCII code like &#12349; - just decode it	if ( entity[0] == '#' )	{		bool valid;		unsigned int ascode = entity.mid(1).toUInt( &valid );								if ( !valid )		{			qWarning ( "LCHMFileImpl::decodeEntity: could not decode HTML entity '%s'", entity.ascii() );			return QString::null;		}		return (QString) (QChar( ascode ));	}	else	{		QMap<QString, QString>::const_iterator it = m_entityDecodeMap.find( entity );		if ( it == m_entityDecodeMap.end() )		{			qWarning ("LCHMFileImpl::decodeEntity: could not decode HTML entity '%s'", entity.ascii());			return QString::null;		}				return *it;	}}inline int LCHMFileImpl::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities){	int qbegin = tag.find ('"', offset);		if ( qbegin == -1 )		qFatal ("LCHMFileImpl::findStringInQuotes: cannot find first quote in <param> tag: '%s'", tag.ascii());	int qend = firstquote ? tag.find ('"', qbegin + 1) : tag.findRev ('"');	if ( qend == -1 || qend <= qbegin )		qFatal ("LCHMFileImpl::findStringInQuotes: cannot find last quote in <param> tag: '%s'", tag.ascii());	// If we do not need to decode HTML entities, just return.	if ( decodeentities )	{		QString htmlentity = QString::null;		bool fill_entity = false;			value.reserve (qend - qbegin); // to avoid multiple memory allocations			for ( int i = qbegin + 1; i < qend; i++ )		{			if ( !fill_entity )			{				if ( tag[i] == '&' ) // HTML entity starts					fill_entity = true;				else					value.append (tag[i]);			}			else			{				if ( tag[i] == ';' ) // HTML entity ends				{					// If entity is an ASCII code, just decode it					QString decode = decodeEntity( htmlentity );										if ( decode.isNull() )						break;										value.append ( decode );					htmlentity = QString::null;					fill_entity = false;				}				else					htmlentity.append (tag[i]);			}		}	}	else		value = tag.mid (qbegin + 1, qend - qbegin - 1);	return qend + 1;}bool LCHMFileImpl::searchWord (const QString& text, 							   bool wholeWords, 		   					   bool titlesOnly, 			                   LCHMSearchProgressResults& results,  					           bool phrase_search){	bool partial = false;	if ( text.isEmpty() || !m_searchAvailable )		return false;	QString searchword = (QString) convertSearchWord (text);#define FTS_HEADER_LEN 0x32	unsigned char header[FTS_HEADER_LEN];	if ( RetrieveObject (&m_chmFIftiMain, header, 0, FTS_HEADER_LEN) == 0 )		return false;		unsigned char doc_index_s = header[0x1E], doc_index_r = header[0x1F];	unsigned char code_count_s = header[0x20], code_count_r = header[0x21];	unsigned char loc_codes_s = header[0x22], loc_codes_r = header[0x23];	if(doc_index_s != 2 || code_count_s != 2 || loc_codes_s != 2)	{		// Don't know how to use values other than 2 yet. Maybe next chmspec.		return false;	}	unsigned char* cursor32 = header + 0x14;	u_int32_t node_offset = UINT32ARRAY(cursor32);	cursor32 = header + 0x2e;	u_int32_t node_len = UINT32ARRAY(cursor32);	unsigned char* cursor16 = header + 0x18;	u_int16_t tree_depth = UINT16ARRAY(cursor16);	unsigned char word_len, pos;	QString word;	u_int32_t i = sizeof(u_int16_t);	u_int16_t free_space;
12 3 下一页
💿 文件大小 692 K
👤 上传用户 huihuisasa
📂 所属分类编辑器/阅读器
🏷️ 相关标签

#chm #linux #阅读器
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -