📄 htmltokenizer.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1998 Waldo Bastian (bastian@kde.org) (C) 1999 Lars Knoll (knoll@kde.org) (C) 1999 Antti Koivisto (koivisto@kde.org) (C) 2000 Dirk Mueller (mueller@kde.org) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget - Tokenizers// $Id: htmltokenizer.cpp,v 1.4 2002/03/07 07:21:55 ymwei Exp $//#define TOKEN_DEBUG//#define TOKEN_PRINT#include <assert.h>#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "render_interface.h"#include "mghtml_part.h"#include "mghtmlview.h"#include "render_object.h"#include "htmltokenizer.h"#include "htmltoken.h"#include "htmlparser.h"#include "htmltoken.h"#include "html_documentimpl.h"#include "dtd.h"#include "htmlhashes.h"#include "kcharsets.h"#include "kdebug.h"#ifdef JAVASCRIPT_ENABLE#include <libjs/jsapi.h>#include "../mgjs/mgjs_main.h"#include "../render/cachescript.h"#endif#define DEBUG_BY_XHTANG 0using namespace khtml;static const QChar commentStart [] = { '<','!','-','-' };static const QChar commentEnd [] = { '-','-','>' };static const QChar scriptEnd [] = { '<','/','s','c','r','i','p','t','>' };static const QChar styleEnd [] = { '<','/','s','t','y','l','e','>' };static const QChar listingEnd [] = { '<','/','l','i','s','t','i','n','g','>' };static const QChar textareaEnd [] = { '<','/','t','e','x','t','a','r','e','a','>' };#define QT_ALLOC_QCHAR_VEC( N ) (QChar*) new char[ sizeof(QChar)*( N ) ]#define QT_DELETE_QCHAR_VEC( P ) delete[] ((char*)( P ))// ----------------------------------------------------------------------------HTMLTokenizer::HTMLTokenizer(KHTMLParser *p, MGHTMLView *_view){ view = _view; buffer = 0; scriptCode = 0; // TODO /* charsets = KGlobal::charsets(); */ parser = p; currToken = 0; cachedScript = 0; executingScript = false; reset();}void HTMLTokenizer::reset(){#ifdef JAVASCRIPT_ENABLE assert(executingScript == false); if (cachedScript) cachedScript->deref((CachedObjectClient*)this); cachedScript = 0;#endif if ( buffer ) QT_DELETE_QCHAR_VEC(buffer); buffer = 0; size = 0; #ifdef JAVASCRIPT_ENABLE if ( scriptCode ) QT_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0;#endif delete currToken; currToken = 0; //parser->reset();}void HTMLTokenizer::begin(){ executingScript = false; reset(); currToken = 0; size = 4095; buffer = QT_ALLOC_QCHAR_VEC( 4096 ); dest = buffer; tag = NoTag; pending = NonePending; discard = NoneDiscard; pre = false; prePos = 0; plaintext = 0; listing = false; processingInstruction = false; script = false; style = false; skipLF = false; select = false; comment = false; textarea = false; startTag = false; tquote = NoQuote; searchCount = 0; charEntity = false; loadingExtScript = false; scriptSrc = ""; pendingSrc = ""; scriptOutput = ""; noMoreData = false; //save string between <script> and </script> m_script="";}void HTMLTokenizer::addListing(DOMStringIt list){ bool old_pre = pre; // This function adds the listing 'list' as // preformatted text-tokens to the token-collection // thereby converting TABs. if(!style) pre = true; prePos = 0; while ( list.length() ) { checkBuffer(); if (skipLF && ( list[0] != '\n' )) { skipLF = false; } if (skipLF) { skipLF = false; ++list; } else if (( list[0] == '\n' ) || ( list[0] == '\r' )) { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending) addPending(); pending = LFPending; } /* Check for MS-DOS CRLF sequence */ if (list[0] == '\r') { skipLF = true; } ++list; } else if (( list[0] == ' ' ) || ( list[0] == '\t')) { if (pending) addPending(); if (list[0] == ' ') pending = SpacePending; else pending = TabPending; ++list; } else { discard = NoneDiscard; if (pending) addPending(); prePos++; *dest++ = list[0]; ++list; } } if ((pending == SpacePending) || (pending == TabPending)) { addPending(); } pending = NonePending; currToken->text = DOMString( buffer, dest-buffer); processToken(); prePos = 0; pre = old_pre;}void HTMLTokenizer::parseListing( DOMStringIt &src){ // We are inside a <script>, <style>, <textarea> or comment. Look for the end tag // which is either </script>, </style> , </textarea> or --> // otherwise print out every received character if (charEntity) { QChar *scriptCodeDest = scriptCode+scriptCodeSize; parseEntity(src,scriptCodeDest); scriptCodeSize = scriptCodeDest-scriptCode; }#ifdef TOKEN_DEBUG kdDebug( 6036 ) << "HTMLTokenizer::parseListing()" << endl;#endif#ifdef JAVASCRIPT_ENABLE bool doScriptExec = false;#endif while ( src.length() ) { // do we need to enlarge the buffer? checkBuffer(); // Allocate memory to store the script. We will write maximal // 10 characers. if ( scriptCodeSize + 10 > scriptCodeMaxSize ) { int newsize = QMAX(scriptCodeMaxSize*2, scriptCodeMaxSize+1024); QChar *newbuf = QT_ALLOC_QCHAR_VEC( newsize ); memcpy( newbuf, scriptCode, scriptCodeSize*sizeof(QChar) ); QT_DELETE_QCHAR_VEC(scriptCode); scriptCode = newbuf; scriptCodeMaxSize = newsize; } char ch = src[0].latin1(); if ( ( ch == '>' ) && ( searchFor[ searchCount ] == '>')) { ++src; scriptCode[ scriptCodeSize ] = 0; scriptCode[ scriptCodeSize + 1 ] = 0; if (comment) currToken->id = ID_COMMENT; /// ####ifdef JAVASCRIPT_ENABLE if (script) { if (!scriptSrc.isEmpty()) { // TODO // forget what we just got; load from src url instead //cachedScript = parser->doc()->docLoader()->requestScript(scriptSrc, parser->doc()->baseURL()); unsigned char u[256]; DOMString bu = parser->doc()->baseURL(); uncharURL((DOM::DOMString &)scriptSrc,(DOM::DOMString &)bu,u); cachedScript=parser->doc()->docLoader()->requestScript(u); } else {#if DEBUG_BY_XHTANG QString xqs(scriptCode, scriptCodeSize); fprintf(stderr,"parseListing xqs:%s\n",xqs.latin1()); m_script+=xqs;#endif #ifdef TOKEN_DEBUG kdDebug( 6036 ) << "---START SCRIPT---" << endl; kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl; kdDebug( 6036 ) << "---END SCRIPT---" << endl;#endif // Parse scriptCode containing <script> info doScriptExec = true; } }#endif else if (style) { //kdDebug( 6036 ) << "---START STYLE---" << endl; //kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl; //kdDebug( 6036 ) << "---END STYLE---" << endl; // just add it. The style element will get a DOM::TextImpl passed, which it will // convert into a StyleSheet. addListing(DOMStringIt(scriptCode, scriptCodeSize)); } else { // // Add scriptcode to the buffer addListing(DOMStringIt(scriptCode, scriptCodeSize)); } processToken(); if(script) currToken->id = ID_SCRIPT + ID_CLOSE_TAG; else if(style) currToken->id = ID_STYLE + ID_CLOSE_TAG; else if (comment) currToken->id = ID_COMMENT + ID_CLOSE_TAG; else if (textarea) currToken->id = ID_TEXTAREA + ID_CLOSE_TAG; else currToken->id = ID_LISTING + ID_CLOSE_TAG; processToken();#ifdef JAVASCRIPT_ENABLE if (cachedScript) {#if DEBUG_BY_XHTANG fprintf(stderr,"htmltokenizer cachedScript->ref\n");#endif cachedScript->ref((CachedObjectClient*)this); if (cachedScript) { // will be 0 if script was already loaded and ref() executed it loadingExtScript = true; pendingSrc = QString(src.current(), src.length()); _src = ""; src = DOMStringIt();#if DEBUG_BY_XHTANG fprintf(stderr,"htmltokenizer pendingSrc:%s\n",pendingSrc.latin1());#endif } } else if (doScriptExec && javascript) {#if DEBUG_BY_XHTANG fprintf(stderr,"htmltokenizer doScriptExec && javascript:%s\n",QString(scriptCode, scriptCodeSize).latin1());#endif executingScript = true; view->part()->executeScript(QString(scriptCode, scriptCodeSize)); executingScript = false; } script = style = listing = comment = textarea = false; if(scriptCode) QT_DELETE_QCHAR_VEC(scriptCode); scriptCode = 0; addScriptOutput();#endif return; // Finished parsing script/style/comment/listing } // Find out wether we see an end tag without looking at // any other then the current character, since further characters // may still be on their way thru the web! else if ( searchCount > 0 ) { const QChar& cmp = src[0]; // broken HTML: "--->" if (comment && searchCount == 2 && cmp.latin1() == '-' && searchBuffer[0].latin1() != '<') { scriptCode[ scriptCodeSize++ ] = cmp; ++src; } // broken HTML: "--!>" else if (comment && searchCount == 2 && cmp.latin1() == '!' && searchBuffer[0].latin1() != '<') { ++src; } // be tolerant: skip spaces before the ">", i.e "</script >" else if (!comment && cmp.isSpace() && searchFor[searchCount].latin1() == '>') { ++src; } else if ( cmp.lower() == searchFor[ searchCount ] ) { searchBuffer[ searchCount++ ] = cmp; ++src; } // We were wrong => print all buffered characters and the current one; else { searchBuffer[ searchCount ] = 0; DOMStringIt pit(searchBuffer,searchCount); while (pit.length()) { if (textarea && pit[0] == '&') { QChar *scriptCodeDest = scriptCode+scriptCodeSize; ++pit; parseEntity(pit,scriptCodeDest,true); scriptCodeSize = scriptCodeDest-scriptCode; } else { scriptCode[ scriptCodeSize++ ] = pit[0]; ++pit; } } searchCount = 0; } } // Is this perhaps the start of the </script> or </style> tag, or --> (end of comment)? else if ( ch == '<' || ch == '-' ) { searchCount = 1; searchBuffer[ 0 ] = src[0]; ++src; } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -