📄 xchmfile.cpp.svn-base
字号:
/*************************************************************************** * Copyright (C) 2005 by Georgy Yunaev * * tim@krasnogorsk.ru * * * * Copyright (C) 2003 Razvan Cojocaru <razvanco@gmx.net> * * XML-RPC/Context ID code contributed by Eamon Millman / PCI Geomatics * * <millman@pcigeomatics.com> * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************/#include <qmessagebox.h> #include <qstring.h>#include <qregexp.h>#include <qmap.h>#include <qeventloop.h>#include <qdom.h>#include <qfile.h>#include <k3listview.h>#include "xchmfile.h"#include "iconstorage.h"#include "bitfiddle.h"#include "kchmurl.h"#include "kchmtreeviewitem.h"// Big-enough buffer size for use with various routines.#define BUF_SIZE 4096#define COMMON_BUF_LEN 1025#define TOPICS_ENTRY_LEN 16#define URLTBL_ENTRY_LEN 12// A little helper to show wait cursor#include <qcursor.h>#include <qapplication.h>class KCHMShowWaitCursor{public: KCHMShowWaitCursor () { QApplication::setOverrideCursor( QCursor(Qt::WaitCursor) ); } ~KCHMShowWaitCursor () { QApplication::restoreOverrideCursor(); }};inline int CHMFile::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities){ // Set up m_entityDecodeMap characters according to current textCodec if ( m_entityDecodeMap.isEmpty() ) { m_entityDecodeMap["AElig"] = encodeWithCurrentCodec ("\306"); // capital AE diphthong (ligature) m_entityDecodeMap["Aacute"] = encodeWithCurrentCodec ("\301"); // capital A, acute accent m_entityDecodeMap["Acirc"] = encodeWithCurrentCodec ("\302"); // capital A, circumflex accent m_entityDecodeMap["Agrave"] = encodeWithCurrentCodec ("\300"); // capital A, grave accent m_entityDecodeMap["Aring"] = encodeWithCurrentCodec ("\305"); // capital A, ring m_entityDecodeMap["Atilde"] = encodeWithCurrentCodec ("\303"); // capital A, tilde m_entityDecodeMap["Auml"] = encodeWithCurrentCodec ("\304"); // capital A, dieresis or umlaut mark m_entityDecodeMap["Ccedil"] = encodeWithCurrentCodec ("\307"); // capital C, cedilla m_entityDecodeMap["Dstrok"] = encodeWithCurrentCodec ("\320"); // whatever m_entityDecodeMap["ETH"] = encodeWithCurrentCodec ("\320"); // capital Eth, Icelandic m_entityDecodeMap["Eacute"] = encodeWithCurrentCodec ("\311"); // capital E, acute accent m_entityDecodeMap["Ecirc"] = encodeWithCurrentCodec ("\312"); // capital E, circumflex accent m_entityDecodeMap["Egrave"] = encodeWithCurrentCodec ("\310"); // capital E, grave accent m_entityDecodeMap["Euml"] = encodeWithCurrentCodec ("\313"); // capital E, dieresis or umlaut mark m_entityDecodeMap["Iacute"] = encodeWithCurrentCodec ("\315"); // capital I, acute accent m_entityDecodeMap["Icirc"] = encodeWithCurrentCodec ("\316"); // capital I, circumflex accent m_entityDecodeMap["Igrave"] = encodeWithCurrentCodec ("\314"); // capital I, grave accent m_entityDecodeMap["Iuml"] = encodeWithCurrentCodec ("\317"); // capital I, dieresis or umlaut mark m_entityDecodeMap["Ntilde"] = encodeWithCurrentCodec ("\321"); // capital N, tilde m_entityDecodeMap["Oacute"] = encodeWithCurrentCodec ("\323"); // capital O, acute accent m_entityDecodeMap["Ocirc"] = encodeWithCurrentCodec ("\324"); // capital O, circumflex accent m_entityDecodeMap["Ograve"] = encodeWithCurrentCodec ("\322"); // capital O, grave accent m_entityDecodeMap["Oslash"] = encodeWithCurrentCodec ("\330"); // capital O, slash m_entityDecodeMap["Otilde"] = encodeWithCurrentCodec ("\325"); // capital O, tilde m_entityDecodeMap["Ouml"] = encodeWithCurrentCodec ("\326"); // capital O, dieresis or umlaut mark m_entityDecodeMap["THORN"] = encodeWithCurrentCodec ("\336"); // capital THORN, Icelandic m_entityDecodeMap["Uacute"] = encodeWithCurrentCodec ("\332"); // capital U, acute accent m_entityDecodeMap["Ucirc"] = encodeWithCurrentCodec ("\333"); // capital U, circumflex accent m_entityDecodeMap["Ugrave"] = encodeWithCurrentCodec ("\331"); // capital U, grave accent m_entityDecodeMap["Uuml"] = encodeWithCurrentCodec ("\334"); // capital U, dieresis or umlaut mark m_entityDecodeMap["Yacute"] = encodeWithCurrentCodec ("\335"); // capital Y, acute accent m_entityDecodeMap["aacute"] = encodeWithCurrentCodec ("\341"); // small a, acute accent m_entityDecodeMap["acirc"] = encodeWithCurrentCodec ("\342"); // small a, circumflex accent m_entityDecodeMap["aelig"] = encodeWithCurrentCodec ("\346"); // small ae diphthong (ligature) m_entityDecodeMap["agrave"] = encodeWithCurrentCodec ("\340"); // small a, grave accent m_entityDecodeMap["aring"] = encodeWithCurrentCodec ("\345"); // small a, ring m_entityDecodeMap["atilde"] = encodeWithCurrentCodec ("\343"); // small a, tilde m_entityDecodeMap["auml"] = encodeWithCurrentCodec ("\344"); // small a, dieresis or umlaut mark m_entityDecodeMap["ccedil"] = encodeWithCurrentCodec ("\347"); // small c, cedilla m_entityDecodeMap["eacute"] = encodeWithCurrentCodec ("\351"); // small e, acute accent m_entityDecodeMap["ecirc"] = encodeWithCurrentCodec ("\352"); // small e, circumflex accent m_entityDecodeMap["egrave"] = encodeWithCurrentCodec ("\350"); // small e, grave accent m_entityDecodeMap["eth"] = encodeWithCurrentCodec ("\360"); // small eth, Icelandic m_entityDecodeMap["euml"] = encodeWithCurrentCodec ("\353"); // small e, dieresis or umlaut mark m_entityDecodeMap["iacute"] = encodeWithCurrentCodec ("\355"); // small i, acute accent m_entityDecodeMap["icirc"] = encodeWithCurrentCodec ("\356"); // small i, circumflex accent m_entityDecodeMap["igrave"] = encodeWithCurrentCodec ("\354"); // small i, grave accent m_entityDecodeMap["iuml"] = encodeWithCurrentCodec ("\357"); // small i, dieresis or umlaut mark m_entityDecodeMap["ntilde"] = encodeWithCurrentCodec ("\361"); // small n, tilde m_entityDecodeMap["oacute"] = encodeWithCurrentCodec ("\363"); // small o, acute accent m_entityDecodeMap["ocirc"] = encodeWithCurrentCodec ("\364"); // small o, circumflex accent m_entityDecodeMap["ograve"] = encodeWithCurrentCodec ("\362"); // small o, grave accent m_entityDecodeMap["oslash"] = encodeWithCurrentCodec ("\370"); // small o, slash m_entityDecodeMap["otilde"] = encodeWithCurrentCodec ("\365"); // small o, tilde m_entityDecodeMap["ouml"] = encodeWithCurrentCodec ("\366"); // small o, dieresis or umlaut mark m_entityDecodeMap["szlig"] = encodeWithCurrentCodec ("\337"); // small sharp s, German (sz ligature) m_entityDecodeMap["thorn"] = encodeWithCurrentCodec ("\376"); // small thorn, Icelandic m_entityDecodeMap["uacute"] = encodeWithCurrentCodec ("\372"); // small u, acute accent m_entityDecodeMap["ucirc"] = encodeWithCurrentCodec ("\373"); // small u, circumflex accent m_entityDecodeMap["ugrave"] = encodeWithCurrentCodec ("\371"); // small u, grave accent m_entityDecodeMap["uuml"] = encodeWithCurrentCodec ("\374"); // small u, dieresis or umlaut mark m_entityDecodeMap["yacute"] = encodeWithCurrentCodec ("\375"); // small y, acute accent m_entityDecodeMap["yuml"] = encodeWithCurrentCodec ("\377"); // small y, dieresis or umlaut mark m_entityDecodeMap["iexcl"] = encodeWithCurrentCodec ("\241"); m_entityDecodeMap["cent"] = encodeWithCurrentCodec ("\242"); m_entityDecodeMap["pound"] = encodeWithCurrentCodec ("\243"); m_entityDecodeMap["curren"] = encodeWithCurrentCodec ("\244"); m_entityDecodeMap["yen"] = encodeWithCurrentCodec ("\245"); m_entityDecodeMap["brvbar"] = encodeWithCurrentCodec ("\246"); m_entityDecodeMap["sect"] = encodeWithCurrentCodec ("\247"); m_entityDecodeMap["uml"] = encodeWithCurrentCodec ("\250"); m_entityDecodeMap["ordf"] = encodeWithCurrentCodec ("\252"); m_entityDecodeMap["laquo"] = encodeWithCurrentCodec ("\253"); m_entityDecodeMap["not"] = encodeWithCurrentCodec ("\254"); m_entityDecodeMap["shy"] = encodeWithCurrentCodec ("\255"); m_entityDecodeMap["macr"] = encodeWithCurrentCodec ("\257"); m_entityDecodeMap["deg"] = encodeWithCurrentCodec ("\260"); m_entityDecodeMap["plusmn"] = encodeWithCurrentCodec ("\261"); m_entityDecodeMap["sup1"] = encodeWithCurrentCodec ("\271"); m_entityDecodeMap["sup2"] = encodeWithCurrentCodec ("\262"); m_entityDecodeMap["sup3"] = encodeWithCurrentCodec ("\263"); m_entityDecodeMap["acute"] = encodeWithCurrentCodec ("\264"); m_entityDecodeMap["micro"] = encodeWithCurrentCodec ("\265"); m_entityDecodeMap["para"] = encodeWithCurrentCodec ("\266"); m_entityDecodeMap["middot"] = encodeWithCurrentCodec ("\267"); m_entityDecodeMap["cedil"] = encodeWithCurrentCodec ("\270"); m_entityDecodeMap["ordm"] = encodeWithCurrentCodec ("\272"); m_entityDecodeMap["raquo"] = encodeWithCurrentCodec ("\273"); m_entityDecodeMap["frac14"] = encodeWithCurrentCodec ("\274"); m_entityDecodeMap["frac12"] = encodeWithCurrentCodec ("\275"); m_entityDecodeMap["frac34"] = encodeWithCurrentCodec ("\276"); m_entityDecodeMap["iquest"] = encodeWithCurrentCodec ("\277"); m_entityDecodeMap["times"] = encodeWithCurrentCodec ("\327"); m_entityDecodeMap["divide"] = encodeWithCurrentCodec ("\367"); m_entityDecodeMap["copy"] = encodeWithCurrentCodec ("\251"); // copyright sign m_entityDecodeMap["reg"] = encodeWithCurrentCodec ("\256"); // registered sign m_entityDecodeMap["nbsp"] = encodeWithCurrentCodec ("\240"); // non breaking space m_entityDecodeMap["rsquo"] = QChar((unsigned short) 8217); m_entityDecodeMap["rdquo"] = QChar((unsigned short) 8221); m_entityDecodeMap["trade"] = QChar((unsigned short) 8482); m_entityDecodeMap["ldquo"] = QChar((unsigned short) 8220); m_entityDecodeMap["mdash"] = QChar((unsigned short) 8212); m_entityDecodeMap["amp"] = "&"; // ampersand m_entityDecodeMap["gt"] = ">"; // greater than m_entityDecodeMap["lt"] = "<"; // less than m_entityDecodeMap["quot"] = "\""; // double quote m_entityDecodeMap["apos"] = "'"; // single quote } int qbegin = tag.indexOf ('"', offset); if ( qbegin == -1 ) qFatal ("CHMFile::findStringInQuotes: cannot find first quote in <param> tag: '%s'", tag.toAscii().constData()); int qend = firstquote ? tag.indexOf ('"', qbegin + 1) : tag.lastIndexOf ('"'); if ( qend == -1 || qend <= qbegin ) qFatal ("CHMFile::findStringInQuotes: cannot find last quote in <param> tag: '%s'", tag.toAscii().constData()); // If we do not need to decode HTML entities, just return. if ( decodeentities ) { QString htmlentity = QString::null; bool fill_entity = false; value.reserve (qend - qbegin); // to avoid multiple memory allocations for ( int i = qbegin + 1; i < qend; i++ ) { if ( !fill_entity ) { if ( tag[i] == '&' ) // HTML entity starts fill_entity = true; else value.append (tag[i]); } else { if ( tag[i] == ';' ) // HTML entity ends { QMap<QString, QString>::const_iterator it = m_entityDecodeMap.find (htmlentity); if ( it == m_entityDecodeMap.end() ) { qWarning ("CHMFile::DecodeHTMLUnicodeEntity: could not decode HTML entity '%s', abort decoding.", htmlentity.toAscii().constData()); break; } value.append (it.value()); htmlentity = QString::null; fill_entity = false; } else htmlentity.append (tag[i]); } } } else value = tag.mid (qbegin + 1, qend - qbegin - 1); return qend + 1;}/*! Insert the \p url into the two maps \p UrlPage and \p PageUrl , taking care of checking if \p url is already in the maps and if the maps contain \p url without the <tt>#ref</tt> part of the url. Also increment \p num if the insertion is done. \note We suppose the two maps are kept in syncro. */static void insertIntoUrlMaps( QMap <QString, int> &UrlPage, QMap <int,QString> &PageUrl, const QString &url, int &num ){ int pos = url.indexOf ('#'); QString tmpurl = pos == -1 ? url : url.left (pos); // url already there, abort insertion if ( UrlPage.contains(tmpurl) ) return; // insert the url into the maps, but insert always the variant without // the #ref part UrlPage.insert(tmpurl,num); PageUrl.insert(num,tmpurl); num++;}CHMFile::CHMFile() : m_chmFile(NULL), m_home("/"){ m_textCodec = 0; m_currentEncoding = 0; m_detectedLCID = 0; m_lookupTablesValid = false;}CHMFile::CHMFile(const QString& archiveName) : m_chmFile(NULL), m_home("/"){ LoadCHM(archiveName);}CHMFile::~CHMFile(){ CloseCHM();}bool CHMFile::LoadCHM(const QString& archiveName){ if(m_chmFile) CloseCHM(); m_chmFile = chm_open (QFile::encodeName(archiveName)); if(m_chmFile == NULL) return false; m_filename = archiveName; // Every CHM has its own encoding m_textCodec = 0; m_currentEncoding = 0; // Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so) InfoFromWindows(); InfoFromSystem(); guessTextEncoding(); if ( ResolveObject("/#TOPICS", &m_chmTOPICS) && ResolveObject("/#STRINGS", &m_chmSTRINGS) && ResolveObject("/#URLTBL", &m_chmURLTBL) && ResolveObject("/#URLSTR", &m_chmURLSTR) ) m_lookupTablesValid = true; else m_lookupTablesValid = false; if ( m_lookupTablesValid && ResolveObject ("/$FIftiMain", &m_chmFIftiMain) ) m_searchAvailable = true; else m_searchAvailable = false; return true;}void CHMFile::CloseCHM(){ if ( m_chmFile == NULL ) return; chm_close(m_chmFile); m_chmFile = NULL; m_home = "/"; m_filename = m_home = m_topicsFile = m_indexFile = m_font = QString::null; m_PageUrl.clear(); m_UrlPage.clear(); m_entityDecodeMap.clear(); m_textCodec = 0; m_detectedLCID = 0; m_currentEncoding = 0; for ( chm_loaded_files_t::iterator it = m_chmLoadedFiles.begin(); it != m_chmLoadedFiles.end(); ++it ) delete it.value();}/* * FIXME: <OBJECT type="text/sitemap"><param name="Merge" value="hhaxref.chm::/HHOCX_c.hhc"></OBJECT> * (from htmlhelp.chm) */bool CHMFile::ParseHhcAndFillTree (const QString& file, QDomDocument *tree, bool asIndex){ chmUnitInfo ui; const int MAX_NEST_DEPTH = 256; if(file.isEmpty() || !ResolveObject(file, &ui)) return false; QString src; GetFileContentAsString(src, &ui); if(src.isEmpty()) return false; unsigned int defaultimagenum = asIndex ? KCHMImageType::IMAGE_INDEX : KCHMImageType::IMAGE_AUTO; unsigned int imagenum = defaultimagenum; int pos = 0, indent = 0; int pnum = 1; bool in_object = false, root_created = false; bool add2treemap = asIndex ? false : m_PageUrl.isEmpty() ; // do not add to the map during index search QString name; QStringList urls; QDomNode * rootentry[MAX_NEST_DEPTH]; QDomNode * lastchild[MAX_NEST_DEPTH]; memset (lastchild, 0, sizeof(*lastchild)); memset (rootentry, 0, sizeof(*rootentry)); // Split the HHC file by HTML tags int stringlen = src.length(); while ( pos < stringlen && (pos = src.indexOf ('<', pos)) != -1 ) { int i, word_end = 0; for ( i = ++pos; i < stringlen; i++ ) { // If a " or ' is found, skip to the next one. if ( (src[i] == '"' || src[i] == '\'') ) { // find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML) int nextpos = src.indexOf (src[i], i+1); if ( nextpos == -1 && (nextpos = src.indexOf ('>', i+1)) == -1 ) { qWarning ("CHMFile::ParseHhcAndFillTree: corrupted TOC: %s", src.mid(i).toAscii().constData()); return false; } i = nextpos; } else if ( src[i] == '>' ) break; else if ( !src[i].isLetterOrNumber() && src[i] != '/' && !word_end ) word_end = i; } QString tagword, tag = src.mid (pos, i - pos); if ( word_end ) tagword = src.mid (pos, word_end - pos).toLower();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -