📄 htmltokenizer.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1998 Waldo Bastian (bastian@kde.org) (C) 1999 Lars Knoll (knoll@kde.org) (C) 1999 Antti Koivisto (koivisto@kde.org) (C) 2001-2003 Dirk Mueller (mueller@kde.org) (C) 2002 Apple Computer, Inc. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget - Tokenizers//#define TOKEN_DEBUG 1//#define TOKEN_DEBUG 2#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "html/htmltokenizer.h"#include "html/html_documentimpl.h"#include "html/htmlparser.h"#include "html/dtd.h"#include "misc/loader.h"#include "misc/htmlhashes.h"#include "khtmlview.h"#include "khtml_part.h"#include "xml/dom_docimpl.h"#include "css/csshelper.h"#include "ecma/kjs_proxy.h"#include <kcharsets.h>#include <kglobal.h>#include <ctype.h>#include <assert.h>#include <qvariant.h>#include <kdebug.h>#include <stdlib.h>#include "kentities.c"using namespace khtml;static const QChar commentStart [] = { '<','!','-','-', QChar::null };static const char scriptEnd [] = "</script";static const char xmpEnd [] = "</xmp";static const char styleEnd [] = "</style";static const char textareaEnd [] = "</textarea";static const char titleEnd [] = "</title";#define KHTML_ALLOC_QCHAR_VEC( N ) (QChar*) malloc( sizeof(QChar)*( N ) )#define KHTML_REALLOC_QCHAR_VEC(P, N ) (QChar*) realloc(P, sizeof(QChar)*( N ))#define KHTML_DELETE_QCHAR_VEC( P ) free((char*)( P ))// Full support for MS Windows extensions to Latin-1.// Technically these extensions should only be activated for pages// marked "windows-1252" or "cp1252", but// in the standard Microsoft way, these extensions infect hundreds of thousands// of web pages. Note that people with non-latin-1 Microsoft extensions// are SOL.//// See: http://www.microsoft.com/globaldev/reference/WinCP.asp// http://www.bbsinc.com/iso8859.html// http://www.obviously.com///// There may be better equivalents#if 0#define fixUpChar(x)#else#define fixUpChar(x) \ switch ((x).unicode()) \ { \ /* ALL of these should be changed to Unicode SOON */ \ case 0x80: (x) = 0x20ac; break; \ case 0x82: (x) = ','; break; \ case 0x83: (x) = 0x0192; break; \ case 0x84: (x) = '"'; break; \ case 0x85: (x) = 0x2026; break; \ case 0x86: (x) = 0x2020; break; \ case 0x87: (x) = 0x2021; break; \ case 0x88: (x) = 0x02C6; break; \ case 0x89: (x) = 0x2030; break; \ case 0x8A: (x) = 0x0160; break; \ case 0x8b: (x) = '<'; break; \ case 0x8C: (x) = 0x0152; break; \ case 0x8E: (x) = 0x017D; break; \ case 0x91: (x) = '\''; break; \ case 0x92: (x) = '\''; break; \ case 0x93: (x) = '"'; break; \ case 0x94: (x) = '"'; break; \ case 0x95: (x) = '*'; break; \ case 0x96: (x) = '-'; break; \ case 0x97: (x) = '-'; break; \ case 0x98: (x) = '~'; break; \ case 0x99: (x) = 0x2122; break; \ case 0x9A: (x) = 0x0161; break; \ case 0x9b: (x) = '>'; break; \ case 0x9C: (x) = 0x0153; break; \ case 0x9E: (x) = 0x017E; break; \ case 0x9F: (x) = 0x0178; break; \ default: break; \ }#endif// ----------------------------------------------------------------------------HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view){ view = _view; buffer = 0; scriptCode = 0; scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; charsets = KGlobal::charsets(); parser = new KHTMLParser(_view, _doc); m_executingScript = 0; m_autoCloseTimer = 0; onHold = false; reset();}HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i){ view = 0; buffer = 0; scriptCode = 0; scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; charsets = KGlobal::charsets(); parser = new KHTMLParser( i, _doc ); m_executingScript = 0; m_autoCloseTimer = 0; onHold = false; reset();}void HTMLTokenizer::reset(){ assert(m_executingScript == 0); Q_ASSERT(onHold == false); m_abort = false; while (!cachedScript.isEmpty()) cachedScript.dequeue()->deref(this); if ( buffer ) KHTML_DELETE_QCHAR_VEC(buffer); buffer = dest = 0; size = 0; if ( scriptCode ) KHTML_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0; if (m_autoCloseTimer) { killTimer(m_autoCloseTimer); m_autoCloseTimer = 0; } currToken.reset();}void HTMLTokenizer::begin(){ m_executingScript = 0; onHold = false; reset(); size = 254; buffer = KHTML_ALLOC_QCHAR_VEC( 255 ); dest = buffer; tag = NoTag; pending = NonePending; discard = NoneDiscard; pre = false; prePos = 0; plaintext = false; xmp = false; processingInstruction = false; script = false; escaped = false; style = false; skipLF = false; select = false; comment = false; server = false; textarea = false; title = false; startTag = false; tquote = NoQuote; searchCount = 0; Entity = NoEntity; noMoreData = false; brokenComments = false; brokenServer = false; brokenScript = false; lineno = 0; scriptStartLineno = 0; tagStartLineno = 0;}void HTMLTokenizer::processListing(TokenizerString list){ bool old_pre = pre; // This function adds the listing 'list' as // preformatted text-tokens to the token-collection // thereby converting TABs. if(!style) pre = true; prePos = 0; while ( !list.isEmpty() ) { checkBuffer(3*TAB_SIZE); if (skipLF && ( *list != '\n' )) { skipLF = false; } if (skipLF) { skipLF = false; ++list; } else if (( *list == '\n' ) || ( *list == '\r' )) { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending) addPending(); // we used to do it not at all and we want to have // it fixed for textarea. So here we are if ( textarea ) { prePos++; *dest++ = *list; } else pending = LFPending; } /* Check for MS-DOS CRLF sequence */ if (*list == '\r') { skipLF = true; } ++list; } else if (( *list == ' ' ) || ( *list == '\t')) { if (pending) addPending(); if (*list == ' ') pending = SpacePending; else pending = TabPending; ++list; } else { discard = NoneDiscard; if (pending) addPending(); prePos++; *dest++ = *list; ++list; } } if ((pending == SpacePending) || (pending == TabPending)) addPending(); else pending = NonePending; prePos = 0; pre = old_pre;}void HTMLTokenizer::parseSpecial(TokenizerString &src){ assert( textarea || title || !Entity ); assert( !tag ); assert( xmp+textarea+title+style+script == 1 ); if (script) scriptStartLineno = lineno+src.lineCount(); if ( comment ) parseComment( src ); while ( !src.isEmpty() ) { checkScriptBuffer(); unsigned char ch = src->latin1(); if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && !title && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && QConstString( scriptCode+scriptCodeSize-3, 3 ).string() == "<!-" ) { comment = true; scriptCode[ scriptCodeSize++ ] = ch; ++src; parseComment( src ); continue; } if ( scriptCodeResync && !tquote && ( ch == '>' ) ) { ++src; scriptCodeSize = scriptCodeResync-1; scriptCodeResync = 0; scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0; if ( script ) scriptHandler(); else { processListing(TokenizerString(scriptCode, scriptCodeSize)); processToken(); if ( style ) { currToken.tid = ID_STYLE + ID_CLOSE_TAG; } else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; } else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; } else if ( xmp ) { currToken.tid = ID_XMP + ID_CLOSE_TAG; } processToken(); script = style = textarea = title = xmp = false; tquote = NoQuote; scriptCodeSize = scriptCodeResync = 0; } return; } // possible end of tagname, lets check. if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch && scriptCodeSize >= searchStopperLen && !QConstString( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).string().find( searchStopper, 0, false )) { scriptCodeResync = scriptCodeSize-searchStopperLen+1; tquote = NoQuote; continue; } if ( scriptCodeResync && !escaped ) { if(ch == '\"') tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote); else if(ch == '\'') tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote; else if (tquote != NoQuote && (ch == '\r' || ch == '\n')) tquote = NoQuote; } escaped = ( !escaped && ch == '\\' ); if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') { QChar *scriptCodeDest = scriptCode+scriptCodeSize; ++src; parseEntity(src,scriptCodeDest,true); scriptCodeSize = scriptCodeDest-scriptCode; } else { scriptCode[ scriptCodeSize++ ] = *src; ++src; } }}void HTMLTokenizer::scriptHandler(){ QString currentScriptSrc = scriptSrc; scriptSrc = QString::null; processListing(TokenizerString(scriptCode, scriptCodeSize)); QString exScript( buffer, dest-buffer ); processToken(); currToken.tid = ID_SCRIPT + ID_CLOSE_TAG; processToken(); TokenizerString prependingSrc; if ( !parser->skipMode() ) { CachedScript* cs = 0; // forget what we just got, load from src url instead if ( !currentScriptSrc.isEmpty() && (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) cachedScript.enqueue(cs); if (cs) { pendingSrc.prepend(src); setSrc(TokenizerString()); scriptCodeSize = scriptCodeResync = 0; cs->ref(this); } else if (currentScriptSrc.isEmpty() && view && javascript ) { if ( !m_executingScript ) pendingSrc.prepend(src); else prependingSrc = src; setSrc(TokenizerString()); scriptCodeSize = scriptCodeResync = 0; scriptExecution( exScript, QString::null, tagStartLineno /*scriptStartLineno*/ ); } } script = false; scriptCodeSize = scriptCodeResync = 0; if ( !m_executingScript && cachedScript.isEmpty() ) { // kdDebug( 6036 ) << "adding pending Output to parsed string" << endl; src.append(pendingSrc); pendingSrc.clear(); } else if ( !prependingSrc.isEmpty() ) write( prependingSrc, false );}void HTMLTokenizer::scriptExecution( const QString& str, const QString& scriptURL, int baseLine){ bool oldscript = script; m_executingScript++; script = false; QString url; if (scriptURL.isNull() && view) url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL().url(); else url = scriptURL; if (view) view->part()->executeScript(url,baseLine+1,Node(),str);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -