⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmltokenizer.cpp

📁 konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版本源码包.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/*    This file is part of the KDE libraries    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1998 Waldo Bastian (bastian@kde.org)              (C) 1999 Lars Knoll (knoll@kde.org)              (C) 1999 Antti Koivisto (koivisto@kde.org)              (C) 2001-2003 Dirk Mueller (mueller@kde.org)              (C) 2002 Apple Computer, Inc.    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget - Tokenizers//#define TOKEN_DEBUG 1//#define TOKEN_DEBUG 2#ifdef HAVE_CONFIG_H#include "config.h"#endif#include "html/htmltokenizer.h"#include "html/html_documentimpl.h"#include "html/htmlparser.h"#include "html/dtd.h"#include "misc/loader.h"#include "misc/htmlhashes.h"#include "khtmlview.h"#include "khtml_part.h"#include "xml/dom_docimpl.h"#include "css/csshelper.h"#include "ecma/kjs_proxy.h"#include <kcharsets.h>#include <kglobal.h>#include <ctype.h>#include <assert.h>#include <qvariant.h>#include <kdebug.h>#include <stdlib.h>#include "kentities.c"using namespace khtml;static const QChar commentStart [] = { '<','!','-','-', QChar::null };static const char scriptEnd [] = "</script";static const char xmpEnd [] = "</xmp";static const char styleEnd [] =  "</style";static const char textareaEnd [] = "</textarea";static const char titleEnd [] = "</title";#define KHTML_ALLOC_QCHAR_VEC( N ) (QChar*) malloc( sizeof(QChar)*( N ) )#define KHTML_REALLOC_QCHAR_VEC(P, N ) (QChar*) realloc(P, sizeof(QChar)*( N ))#define KHTML_DELETE_QCHAR_VEC( P ) free((char*)( P ))// Full support for MS Windows extensions to Latin-1.// Technically these extensions should only be activated for pages// marked "windows-1252" or "cp1252", but// in the standard Microsoft way, these extensions infect hundreds of thousands// of web pages.  Note that people with non-latin-1 Microsoft extensions// are SOL.//// See: http://www.microsoft.com/globaldev/reference/WinCP.asp//      http://www.bbsinc.com/iso8859.html//      http://www.obviously.com///// There may be better equivalents#if 0#define fixUpChar(x)#else#define fixUpChar(x) \            switch ((x).unicode()) \            { \            /* ALL of these should be changed to Unicode SOON */ \            case 0x80: (x) = 0x20ac; break; \            case 0x82: (x) = ',';    break; \            case 0x83: (x) = 0x0192; break; \            case 0x84: (x) = '"';    break; \            case 0x85: (x) = 0x2026; break; \            case 0x86: (x) = 0x2020; break; \            case 0x87: (x) = 0x2021; break; \            case 0x88: (x) = 0x02C6; break; \            case 0x89: (x) = 0x2030; break; \            case 0x8A: (x) = 0x0160; break; \            case 0x8b: (x) = '<';    break; \            case 0x8C: (x) = 0x0152; break; \            case 0x8E: (x) = 0x017D; break; \            case 0x91: (x) = '\'';   break; \            case 0x92: (x) = '\'';   break; \            case 0x93: (x) = '"';    break; \            case 0x94: (x) = '"';    break; \            case 0x95: (x) = '*';    break; \            case 0x96: (x) = '-';    break; \            case 0x97: (x) = '-';    break; \            case 0x98: (x) = '~';    break; \            case 0x99: (x) = 0x2122; break; \            case 0x9A: (x) = 0x0161; break; \            case 0x9b: (x) = '>';    break; \            case 0x9C: (x) = 0x0153; break; \            case 0x9E: (x) = 0x017E; break; \            case 0x9F: (x) = 0x0178; break; \            default: break; \            }#endif// ----------------------------------------------------------------------------HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view){    view = _view;    buffer = 0;    scriptCode = 0;    scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;    charsets = KGlobal::charsets();    parser = new KHTMLParser(_view, _doc);    m_executingScript = 0;    m_autoCloseTimer = 0;    onHold = false;    reset();}HTMLTokenizer::HTMLTokenizer(DOM::DocumentPtr *_doc, DOM::DocumentFragmentImpl *i){    view = 0;    buffer = 0;    scriptCode = 0;    scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;    charsets = KGlobal::charsets();    parser = new KHTMLParser( i, _doc );    m_executingScript = 0;    m_autoCloseTimer = 0;    onHold = false;    reset();}void HTMLTokenizer::reset(){    assert(m_executingScript == 0);    Q_ASSERT(onHold == false);    m_abort = false;    while (!cachedScript.isEmpty())        cachedScript.dequeue()->deref(this);    if ( buffer )        KHTML_DELETE_QCHAR_VEC(buffer);    buffer = dest = 0;    size = 0;    if ( scriptCode )        KHTML_DELETE_QCHAR_VEC(scriptCode);    scriptCode = 0;    scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;    if (m_autoCloseTimer) {        killTimer(m_autoCloseTimer);        m_autoCloseTimer = 0;    }    currToken.reset();}void HTMLTokenizer::begin(){    m_executingScript = 0;    onHold = false;    reset();    size = 254;    buffer = KHTML_ALLOC_QCHAR_VEC( 255 );    dest = buffer;    tag = NoTag;    pending = NonePending;    discard = NoneDiscard;    pre = false;    prePos = 0;    plaintext = false;    xmp = false;    processingInstruction = false;    script = false;    escaped = false;    style = false;    skipLF = false;    select = false;    comment = false;    server = false;    textarea = false;    title = false;    startTag = false;    tquote = NoQuote;    searchCount = 0;    Entity = NoEntity;    noMoreData = false;    brokenComments = false;    brokenServer = false;    brokenScript = false;    lineno = 0;    scriptStartLineno = 0;    tagStartLineno = 0;}void HTMLTokenizer::processListing(TokenizerString list){    bool old_pre = pre;    // This function adds the listing 'list' as    // preformatted text-tokens to the token-collection    // thereby converting TABs.    if(!style) pre = true;    prePos = 0;    while ( !list.isEmpty() )    {        checkBuffer(3*TAB_SIZE);        if (skipLF && ( *list != '\n' ))        {            skipLF = false;        }        if (skipLF)        {            skipLF = false;            ++list;        }        else if (( *list == '\n' ) || ( *list == '\r' ))        {            if (discard == LFDiscard)            {                // Ignore this LF                discard = NoneDiscard; // We have discarded 1 LF            }            else            {                // Process this LF                if (pending)                    addPending();                // we used to do it not at all and we want to have                // it fixed for textarea. So here we are                if ( textarea ) {                    prePos++;                    *dest++ = *list;                } else                    pending = LFPending;            }            /* Check for MS-DOS CRLF sequence */            if (*list == '\r')            {                skipLF = true;            }            ++list;        }        else if (( *list == ' ' ) || ( *list == '\t'))        {            if (pending)                addPending();            if (*list == ' ')                pending = SpacePending;            else                pending = TabPending;            ++list;        }        else        {            discard = NoneDiscard;            if (pending)                addPending();            prePos++;            *dest++ = *list;            ++list;        }    }    if ((pending == SpacePending) || (pending == TabPending))        addPending();    else        pending = NonePending;    prePos = 0;    pre = old_pre;}void HTMLTokenizer::parseSpecial(TokenizerString &src){    assert( textarea || title || !Entity );    assert( !tag );    assert( xmp+textarea+title+style+script == 1 );    if (script)        scriptStartLineno = lineno+src.lineCount();    if ( comment ) parseComment( src );    while ( !src.isEmpty() ) {        checkScriptBuffer();        unsigned char ch = src->latin1();        if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && !title && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && QConstString( scriptCode+scriptCodeSize-3, 3 ).string() == "<!-" ) {            comment = true;            scriptCode[ scriptCodeSize++ ] = ch;            ++src;            parseComment( src );            continue;        }        if ( scriptCodeResync && !tquote && ( ch == '>' ) ) {            ++src;            scriptCodeSize = scriptCodeResync-1;            scriptCodeResync = 0;            scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;            if ( script )                scriptHandler();            else {                processListing(TokenizerString(scriptCode, scriptCodeSize));                processToken();                if ( style )         { currToken.tid = ID_STYLE + ID_CLOSE_TAG; }                else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; }                else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; }                else if ( xmp )  { currToken.tid = ID_XMP + ID_CLOSE_TAG; }                processToken();                script = style = textarea = title = xmp = false;                tquote = NoQuote;                scriptCodeSize = scriptCodeResync = 0;            }            return;        }        // possible end of tagname, lets check.        if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch &&             scriptCodeSize >= searchStopperLen &&             !QConstString( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).string().find( searchStopper, 0, false )) {            scriptCodeResync = scriptCodeSize-searchStopperLen+1;            tquote = NoQuote;            continue;        }        if ( scriptCodeResync && !escaped ) {            if(ch == '\"')                tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);            else if(ch == '\'')                tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;            else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))                tquote = NoQuote;        }        escaped = ( !escaped && ch == '\\' );        if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') {            QChar *scriptCodeDest = scriptCode+scriptCodeSize;            ++src;            parseEntity(src,scriptCodeDest,true);            scriptCodeSize = scriptCodeDest-scriptCode;        }        else {            scriptCode[ scriptCodeSize++ ] = *src;            ++src;        }    }}void HTMLTokenizer::scriptHandler(){    QString currentScriptSrc = scriptSrc;    scriptSrc = QString::null;    processListing(TokenizerString(scriptCode, scriptCodeSize));    QString exScript( buffer, dest-buffer );    processToken();    currToken.tid = ID_SCRIPT + ID_CLOSE_TAG;    processToken();    TokenizerString prependingSrc;    if ( !parser->skipMode() ) {        CachedScript* cs = 0;        // forget what we just got, load from src url instead        if ( !currentScriptSrc.isEmpty() &&             (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) ))            cachedScript.enqueue(cs);        if (cs) {            pendingSrc.prepend(src);            setSrc(TokenizerString());            scriptCodeSize = scriptCodeResync = 0;            cs->ref(this);        }        else if (currentScriptSrc.isEmpty() && view && javascript ) {            if ( !m_executingScript )                pendingSrc.prepend(src);            else                prependingSrc = src;            setSrc(TokenizerString());            scriptCodeSize = scriptCodeResync = 0;            scriptExecution( exScript, QString::null, tagStartLineno /*scriptStartLineno*/ );        }    }    script = false;    scriptCodeSize = scriptCodeResync = 0;    if ( !m_executingScript && cachedScript.isEmpty() ) {        // kdDebug( 6036 ) << "adding pending Output to parsed string" << endl;        src.append(pendingSrc);        pendingSrc.clear();    } else if ( !prependingSrc.isEmpty() )        write( prependingSrc, false );}void HTMLTokenizer::scriptExecution( const QString& str, const QString& scriptURL,                                     int baseLine){    bool oldscript = script;    m_executingScript++;    script = false;    QString url;    if (scriptURL.isNull() && view)      url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL().url();    else      url = scriptURL;    if (view)	view->part()->executeScript(url,baseLine+1,Node(),str);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -