📄 htmltokenizer.cpp

📁 monqueror一个很具有参考价值的源玛
💻 CPP
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*    This file is part of the KDE libraries    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1998 Waldo Bastian (bastian@kde.org)              (C) 1999 Lars Knoll (knoll@kde.org)              (C) 1999 Antti Koivisto (koivisto@kde.org)              (C) 2000 Dirk Mueller (mueller@kde.org)    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,    Boston, MA 02111-1307, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget - Tokenizers// $Id: htmltokenizer.cpp,v 1.4 2002/03/07 07:21:55 ymwei Exp $//#define TOKEN_DEBUG//#define TOKEN_PRINT#include <assert.h>#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "render_interface.h"#include "mghtml_part.h"#include "mghtmlview.h"#include "render_object.h"#include "htmltokenizer.h"#include "htmltoken.h"#include "htmlparser.h"#include "htmltoken.h"#include "html_documentimpl.h"#include "dtd.h"#include "htmlhashes.h"#include "kcharsets.h"#include "kdebug.h"#ifdef JAVASCRIPT_ENABLE#include <libjs/jsapi.h>#include "../mgjs/mgjs_main.h"#include "../render/cachescript.h"#endif#define DEBUG_BY_XHTANG 0using namespace khtml;static const QChar commentStart [] = { '<','!','-','-' };static const QChar commentEnd [] = { '-','-','>' };static const QChar scriptEnd [] = { '<','/','s','c','r','i','p','t','>' };static const QChar styleEnd [] = { '<','/','s','t','y','l','e','>' };static const QChar listingEnd [] = { '<','/','l','i','s','t','i','n','g','>' };static const QChar textareaEnd [] = { '<','/','t','e','x','t','a','r','e','a','>' };#define QT_ALLOC_QCHAR_VEC( N ) (QChar*) new char[ sizeof(QChar)*( N ) ]#define QT_DELETE_QCHAR_VEC( P ) delete[] ((char*)( P ))// ----------------------------------------------------------------------------HTMLTokenizer::HTMLTokenizer(KHTMLParser *p, MGHTMLView *_view){    view = _view;    buffer = 0;    scriptCode = 0;	// TODO	/*    charsets = KGlobal::charsets();	*/    parser = p;    currToken = 0;    cachedScript = 0;    executingScript = false;    reset();}void HTMLTokenizer::reset(){#ifdef JAVASCRIPT_ENABLE    assert(executingScript == false);    if (cachedScript)        cachedScript->deref((CachedObjectClient*)this);    cachedScript = 0;#endif	    if ( buffer )        QT_DELETE_QCHAR_VEC(buffer);    buffer = 0;    size = 0;	#ifdef JAVASCRIPT_ENABLE    if ( scriptCode )        QT_DELETE_QCHAR_VEC(scriptCode);    scriptCode = 0;#endif	    delete currToken;    currToken = 0;    //parser->reset();}void HTMLTokenizer::begin(){    executingScript = false;    reset();    currToken = 0;    size = 4095;    buffer = QT_ALLOC_QCHAR_VEC( 4096 );    dest = buffer;    tag = NoTag;    pending = NonePending;    discard = NoneDiscard;    pre = false;    prePos = 0;    plaintext = 0;    listing = false;    processingInstruction = false;    script = false;    style = false;    skipLF = false;    select = false;    comment = false;    textarea = false;    startTag = false;    tquote = NoQuote;    searchCount = 0;    charEntity = false;    loadingExtScript = false;    scriptSrc = "";    pendingSrc = "";    scriptOutput = "";    noMoreData = false;	//save string between <script> and </script>	m_script="";}void HTMLTokenizer::addListing(DOMStringIt list){    bool old_pre = pre;    // This function adds the listing 'list' as    // preformatted text-tokens to the token-collection    // thereby converting TABs.    if(!style) pre = true;    prePos = 0;    while ( list.length() )    {        checkBuffer();        if (skipLF && ( list[0] != '\n' ))        {            skipLF = false;        }        if (skipLF)        {            skipLF = false;            ++list;        }        else if (( list[0] == '\n' ) || ( list[0] == '\r' ))        {            if (discard == LFDiscard)            {                // Ignore this LF                discard = NoneDiscard; // We have discarded 1 LF            }            else            {                // Process this LF                if (pending)                    addPending();                pending = LFPending;            }            /* Check for MS-DOS CRLF sequence */            if (list[0] == '\r')            {                skipLF = true;            }            ++list;        }        else if (( list[0] == ' ' ) || ( list[0] == '\t'))        {            if (pending)                addPending();            if (list[0] == ' ')                pending = SpacePending;            else                pending = TabPending;            ++list;        }        else        {            discard = NoneDiscard;            if (pending)                addPending();            prePos++;            *dest++ = list[0];            ++list;        }    }    if ((pending == SpacePending) || (pending == TabPending))    {        addPending();    }    pending = NonePending;    currToken->text = DOMString( buffer, dest-buffer);    processToken();    prePos = 0;    pre = old_pre;}void HTMLTokenizer::parseListing( DOMStringIt &src){    // We are inside a <script>, <style>, <textarea> or comment. Look for the end tag    // which is either </script>, </style> , </textarea> or -->    // otherwise print out every received character    if (charEntity) {        QChar *scriptCodeDest = scriptCode+scriptCodeSize;        parseEntity(src,scriptCodeDest);        scriptCodeSize = scriptCodeDest-scriptCode;    }#ifdef TOKEN_DEBUG    kdDebug( 6036 ) << "HTMLTokenizer::parseListing()" << endl;#endif#ifdef JAVASCRIPT_ENABLE    bool doScriptExec = false;#endif    while ( src.length() )    {        // do we need to enlarge the buffer?        checkBuffer();        // Allocate memory to store the script. We will write maximal        // 10 characers.        if ( scriptCodeSize + 10 > scriptCodeMaxSize )        {            int newsize = QMAX(scriptCodeMaxSize*2, scriptCodeMaxSize+1024);            QChar *newbuf = QT_ALLOC_QCHAR_VEC( newsize );            memcpy( newbuf, scriptCode, scriptCodeSize*sizeof(QChar) );            QT_DELETE_QCHAR_VEC(scriptCode);            scriptCode = newbuf;            scriptCodeMaxSize = newsize;        }        char ch = src[0].latin1();        if ( ( ch == '>' ) && ( searchFor[ searchCount ] == '>'))        {            ++src;            scriptCode[ scriptCodeSize ] = 0;            scriptCode[ scriptCodeSize + 1 ] = 0;            if (comment) currToken->id = ID_COMMENT; /// ####ifdef JAVASCRIPT_ENABLE			if (script)            {                if (!scriptSrc.isEmpty()) {					// TODO                    // forget what we just got; load from src url instead                       //cachedScript = parser->doc()->docLoader()->requestScript(scriptSrc, parser->doc()->baseURL());					unsigned char u[256];					DOMString bu = parser->doc()->baseURL();					uncharURL((DOM::DOMString &)scriptSrc,(DOM::DOMString &)bu,u);					cachedScript=parser->doc()->docLoader()->requestScript(u);                }                else {#if DEBUG_BY_XHTANG						QString xqs(scriptCode, scriptCodeSize);					fprintf(stderr,"parseListing xqs:%s\n",xqs.latin1());					m_script+=xqs;#endif					#ifdef TOKEN_DEBUG                    kdDebug( 6036 ) << "---START SCRIPT---" << endl;                    kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl;                    kdDebug( 6036 ) << "---END SCRIPT---" << endl;#endif                    // Parse scriptCode containing <script> info                    doScriptExec = true;                }            }#endif            else 			if (style)            {		//kdDebug( 6036 ) << "---START STYLE---" << endl;		//kdDebug( 6036 ) << QString(scriptCode, scriptCodeSize) << endl;		//kdDebug( 6036 ) << "---END STYLE---" << endl;                // just add it. The style element will get a DOM::TextImpl passed, which it will                // convert into a StyleSheet.                addListing(DOMStringIt(scriptCode, scriptCodeSize));            }            else            {                //                // Add scriptcode to the buffer                addListing(DOMStringIt(scriptCode, scriptCodeSize));            }            processToken();            if(script)                currToken->id = ID_SCRIPT + ID_CLOSE_TAG;            else if(style)                currToken->id = ID_STYLE + ID_CLOSE_TAG;            else if (comment)                currToken->id = ID_COMMENT + ID_CLOSE_TAG;		    else if (textarea)				currToken->id = ID_TEXTAREA + ID_CLOSE_TAG;            else                currToken->id = ID_LISTING + ID_CLOSE_TAG;            processToken();#ifdef JAVASCRIPT_ENABLE			if (cachedScript) {#if DEBUG_BY_XHTANG	fprintf(stderr,"htmltokenizer cachedScript->ref\n");#endif	                cachedScript->ref((CachedObjectClient*)this);                if (cachedScript) { // will be 0 if script was already loaded and ref() executed it                    loadingExtScript = true;                    pendingSrc = QString(src.current(), src.length());                    _src = "";                    src = DOMStringIt();#if DEBUG_BY_XHTANG	fprintf(stderr,"htmltokenizer pendingSrc:%s\n",pendingSrc.latin1());#endif				}			}            else if (doScriptExec && javascript) {#if DEBUG_BY_XHTANG				fprintf(stderr,"htmltokenizer doScriptExec && javascript:%s\n",QString(scriptCode, scriptCodeSize).latin1());#endif				executingScript = true;                view->part()->executeScript(QString(scriptCode, scriptCodeSize));                executingScript = false;			}            script = style = listing = comment = textarea = false;            if(scriptCode)                QT_DELETE_QCHAR_VEC(scriptCode);            scriptCode = 0;            addScriptOutput();#endif            return; // Finished parsing script/style/comment/listing        }        // Find out wether we see an end tag without looking at        // any other then the current character, since further characters        // may still be on their way thru the web!        else if ( searchCount > 0 )        {            const QChar& cmp = src[0];            // broken HTML: "--->"            if (comment && searchCount == 2 && cmp.latin1() == '-' && searchBuffer[0].latin1() != '<')            {                scriptCode[ scriptCodeSize++ ] = cmp;                ++src;            }            // broken HTML: "--!>"            else if (comment && searchCount == 2 && cmp.latin1() == '!' && searchBuffer[0].latin1() != '<')            {                ++src;            }            // be tolerant: skip spaces before the ">", i.e "</script >"            else if (!comment && cmp.isSpace() && searchFor[searchCount].latin1() == '>')            {                ++src;            }            else if ( cmp.lower() == searchFor[ searchCount ] )            {                searchBuffer[ searchCount++ ] = cmp;                ++src;            }            // We were wrong => print all buffered characters and the current one;            else            {                searchBuffer[ searchCount ] = 0;		DOMStringIt pit(searchBuffer,searchCount);		while (pit.length()) {		    if (textarea && pit[0] == '&') {			QChar *scriptCodeDest = scriptCode+scriptCodeSize;			++pit;			parseEntity(pit,scriptCodeDest,true);			scriptCodeSize = scriptCodeDest-scriptCode;		    }		    else {			scriptCode[ scriptCodeSize++ ] = pit[0];			++pit;		    }		}                searchCount = 0;            }        }        // Is this perhaps the start of the </script> or </style> tag, or --> (end of comment)?        else if ( ch == '<' || ch == '-' )        {            searchCount = 1;            searchBuffer[ 0 ] = src[0];            ++src;        }	else
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -