htmltokenizer.cpp

来自「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自Web」· C++ 代码 · 共 1,638 行 · 第 1/5 页
CPP
1,638 行
/*    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1998 Waldo Bastian (bastian@kde.org)              (C) 1999 Lars Knoll (knoll@kde.org)              (C) 1999 Antti Koivisto (koivisto@kde.org)              (C) 2001 Dirk Mueller (mueller@kde.org)    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.    Copyright (C) 2005, 2006 Alexey Proskuryakov (ap@nypop.com)    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*/#include "config.h"#include "HTMLTokenizer.h"#include "CSSHelper.h"#include "Cache.h"#include "CachedScript.h"#include "DocLoader.h"#include "DocumentFragment.h"#include "EventNames.h"#include "Frame.h"#include "FrameLoader.h"#include "FrameView.h"#include "HTMLElement.h"#include "HTMLNames.h"#include "HTMLParser.h"#include "HTMLScriptElement.h"#include "HTMLViewSourceDocument.h"#include "Page.h"#include "PreloadScanner.h"#include "ScriptController.h"#include "ScriptSourceCode.h"#include "ScriptValue.h"#include <wtf/ASCIICType.h>#include <wtf/CurrentTime.h>#include "HTMLEntityNames.c"#define PRELOAD_SCANNER_ENABLED 1// #define INSTRUMENT_LAYOUT_SCHEDULING 1using namespace WTF;using namespace std;namespace WebCore {using namespace HTMLNames;#if MOBILE// The mobile device needs to be responsive, as such the tokenizer chunk size is reduced.// This value is used to define how many characters the tokenizer will process before // yeilding control.static const int defaultTokenizerChunkSize = 256;#elsestatic const int defaultTokenizerChunkSize = 4096;#endif#if MOBILE// As the chunks are smaller (above), the tokenizer should not yield for as long a period, otherwise// it will take way to long to load a page.static const double defaultTokenizerTimeDelay = 0.300;#else// FIXME: We would like this constant to be 200ms.// Yielding more aggressively results in increased responsiveness and better incremental rendering.// It slows down overall page-load on slower machines, though, so for now we set a value of 500.static const double defaultTokenizerTimeDelay = 0.500;#endifstatic const char commentStart [] = "<!--";static const char doctypeStart [] = "<!doctype";static const char publicStart [] = "public";static const char systemStart [] = "system";static const char scriptEnd [] = "</script";static const char xmpEnd [] = "</xmp";static const char styleEnd [] =  "</style";static const char textareaEnd [] = "</textarea";static const char titleEnd [] = "</title";static const char iframeEnd [] = "</iframe";// Full support for MS Windows extensions to Latin-1.// Technically these extensions should only be activated for pages// marked "windows-1252" or "cp1252", but// in the standard Microsoft way, these extensions infect hundreds of thousands// of web pages.  Note that people with non-latin-1 Microsoft extensions// are SOL.//// See: http://www.microsoft.com/globaldev/reference/WinCP.asp//      http://www.bbsinc.com/iso8859.html//      http://www.obviously.com///// There may be better equivalents// We only need this for entities. For non-entity text, we handle this in the text encoding.static const UChar windowsLatin1ExtensionArray[32] = {    0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87    0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F    0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97    0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178  // 98-9F};static inline UChar fixUpChar(UChar c){    if ((c & ~0x1F) != 0x0080)        return c;    return windowsLatin1ExtensionArray[c - 0x80];}static inline bool tagMatch(const char* s1, const UChar* s2, unsigned length){    for (unsigned i = 0; i != length; ++i) {        unsigned char c1 = s1[i];        unsigned char uc1 = toASCIIUpper(static_cast<char>(c1));        UChar c2 = s2[i];        if (c1 != c2 && uc1 != c2)            return false;    }    return true;}inline void Token::addAttribute(AtomicString& attrName, const AtomicString& attributeValue, bool viewSourceMode){    if (!attrName.isEmpty()) {        ASSERT(!attrName.contains('/'));        RefPtr<MappedAttribute> a = MappedAttribute::create(attrName, attributeValue);        if (!attrs) {            attrs = NamedMappedAttrMap::create();            attrs->reserveInitialCapacity(10);        }        attrs->insertAttribute(a.release(), viewSourceMode);    }        attrName = emptyAtom;}// ----------------------------------------------------------------------------HTMLTokenizer::HTMLTokenizer(HTMLDocument* doc, bool reportErrors)    : Tokenizer()    , m_buffer(0)    , m_scriptCode(0)    , m_scriptCodeSize(0)    , m_scriptCodeCapacity(0)    , m_scriptCodeResync(0)    , m_executingScript(0)    , m_requestingScript(false)    , m_hasScriptsWaitingForStylesheets(false)    , m_timer(this, &HTMLTokenizer::timerFired)    , m_doc(doc)    , m_parser(new HTMLParser(doc, reportErrors))    , m_inWrite(false)    , m_fragment(false){    begin();}HTMLTokenizer::HTMLTokenizer(HTMLViewSourceDocument* doc)    : Tokenizer(true)    , m_buffer(0)    , m_scriptCode(0)    , m_scriptCodeSize(0)    , m_scriptCodeCapacity(0)    , m_scriptCodeResync(0)    , m_executingScript(0)    , m_requestingScript(false)    , m_hasScriptsWaitingForStylesheets(false)    , m_timer(this, &HTMLTokenizer::timerFired)    , m_doc(doc)    , m_parser(0)    , m_inWrite(false)    , m_fragment(false){    begin();}HTMLTokenizer::HTMLTokenizer(DocumentFragment* frag)    : m_buffer(0)    , m_scriptCode(0)    , m_scriptCodeSize(0)    , m_scriptCodeCapacity(0)    , m_scriptCodeResync(0)    , m_executingScript(0)    , m_requestingScript(false)    , m_hasScriptsWaitingForStylesheets(false)    , m_timer(this, &HTMLTokenizer::timerFired)    , m_doc(frag->document())    , m_parser(new HTMLParser(frag))    , m_inWrite(false)    , m_fragment(true){    begin();}void HTMLTokenizer::reset(){    ASSERT(m_executingScript == 0);    while (!m_pendingScripts.isEmpty()) {        CachedScript* cs = m_pendingScripts.first().get();        m_pendingScripts.removeFirst();        ASSERT(cache()->disabled() || cs->accessCount() > 0);        cs->removeClient(this);    }    fastFree(m_buffer);    m_buffer = m_dest = 0;    m_bufferSize = 0;    fastFree(m_scriptCode);    m_scriptCode = 0;    m_scriptCodeSize = m_scriptCodeCapacity = m_scriptCodeResync = 0;    m_timer.stop();    m_state.setAllowYield(false);    m_state.setForceSynchronous(false);    m_currentToken.reset();    m_doctypeToken.reset();    m_doctypeSearchCount = 0;    m_doctypeSecondarySearchCount = 0;    m_hasScriptsWaitingForStylesheets = false;}void HTMLTokenizer::begin(){    m_executingScript = 0;    m_requestingScript = false;    m_hasScriptsWaitingForStylesheets = false;    m_state.setLoadingExtScript(false);    reset();    m_bufferSize = 254;    m_buffer = static_cast<UChar*>(fastMalloc(sizeof(UChar) * 254));    m_dest = m_buffer;    tquote = NoQuote;    searchCount = 0;    m_state.setEntityState(NoEntity);    m_scriptTagSrcAttrValue = String();    m_pendingSrc.clear();    m_currentPrependingSrc = 0;    m_noMoreData = false;    m_brokenComments = false;    m_brokenServer = false;    m_lineNumber = 0;    m_currentScriptTagStartLineNumber = 0;    m_currentTagStartLineNumber = 0;    m_state.setForceSynchronous(false);    Page* page = m_doc->page();    if (page && page->hasCustomHTMLTokenizerTimeDelay())        m_tokenizerTimeDelay = page->customHTMLTokenizerTimeDelay();    else        m_tokenizerTimeDelay = defaultTokenizerTimeDelay;    if (page && page->hasCustomHTMLTokenizerChunkSize())        m_tokenizerChunkSize = page->customHTMLTokenizerChunkSize();    else        m_tokenizerChunkSize = defaultTokenizerChunkSize;}void HTMLTokenizer::setForceSynchronous(bool force){    m_state.setForceSynchronous(force);}HTMLTokenizer::State HTMLTokenizer::processListing(SegmentedString list, State state){    // This function adds the listing 'list' as    // preformatted text-tokens to the token-collection    while (!list.isEmpty()) {        if (state.skipLF()) {            state.setSkipLF(false);            if (*list == '\n') {                list.advance();                continue;            }        }        checkBuffer();        if (*list == '\n' || *list == '\r') {            if (state.discardLF())                // Ignore this LF                state.setDiscardLF(false); // We have discarded 1 LF            else                *m_dest++ = '\n';            /* Check for MS-DOS CRLF sequence */            if (*list == '\r')                state.setSkipLF(true);            list.advance();        } else {            state.setDiscardLF(false);            *m_dest++ = *list;            list.advance();        }    }    return state;}HTMLTokenizer::State HTMLTokenizer::parseSpecial(SegmentedString& src, State state){    ASSERT(state.inTextArea() || state.inTitle() || state.inIFrame() || !state.hasEntityState());    ASSERT(!state.hasTagState());    ASSERT(state.inXmp() + state.inTextArea() + state.inTitle() + state.inStyle() + state.inScript() + state.inIFrame() == 1 );    if (state.inScript() && !m_currentScriptTagStartLineNumber)        m_currentScriptTagStartLineNumber = m_lineNumber;    if (state.inComment())         state = parseComment(src, state);    int lastDecodedEntityPosition = -1;
htmltokenizer.cpp - 源码说明

本页面展示了「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit」中的 htmltokenizer.cpp 源码文件，采用 C++ 编程语言编写，共 1,638 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与WebKit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?