preloadscanner.cpp

来自「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自Web」· C++ 代码 · 共 859 行 · 第 1/2 页
CPP
859 行
/* * Copyright (C) 2008 Apple Inc. All Rights Reserved. * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  */#include "config.h"#include "PreloadScanner.h"#include "AtomicString.h"#include "CachedCSSStyleSheet.h"#include "CachedImage.h"#include "CachedResource.h"#include "CachedResourceClient.h"#include "CachedScript.h"#include "CSSHelper.h"#include "CString.h"#include "DocLoader.h"#include "Document.h"#include "Frame.h"#include "FrameLoader.h"#include "HTMLLinkElement.h"#include "HTMLNames.h"#include <wtf/CurrentTime.h>#include <wtf/unicode/Unicode.h>#if COMPILER(GCC)// The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined.#include "HTMLEntityNames.c"#else// Not inlined for non-GCC compilersstruct Entity {    const char* name;    int code;};const struct Entity* findEntity(register const char* str, register unsigned int len);#endif#define PRELOAD_DEBUG 0using namespace WTF;namespace WebCore {    using namespace HTMLNames;    PreloadScanner::PreloadScanner(Document* doc)    : m_inProgress(false)    , m_timeUsed(0)    , m_bodySeen(false)    , m_document(doc){#if PRELOAD_DEBUG    printf("CREATING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());#endif}    PreloadScanner::~PreloadScanner(){#if PRELOAD_DEBUG    printf("DELETING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());    printf("TOTAL TIME USED %.4fs\n", m_timeUsed);#endif}    void PreloadScanner::begin() {     ASSERT(!m_inProgress);     reset();     m_inProgress = true; }    void PreloadScanner::end() {     ASSERT(m_inProgress);     m_inProgress = false; }void PreloadScanner::reset(){    m_source.clear();        m_state = Data;    m_escape = false;    m_contentModel = PCDATA;    m_commentPos = 0;    m_closeTag = false;    m_tagName.clear();    m_attributeName.clear();    m_attributeValue.clear();    m_lastStartTag = AtomicString();        m_urlToLoad = String();    m_charset = String();    m_linkIsStyleSheet = false;    m_lastCharacterIndex = 0;    clearLastCharacters();        m_cssState = CSSInitial;    m_cssRule.clear();    m_cssRuleValue.clear();}    bool PreloadScanner::scanningBody() const{    return m_document->body() || m_bodySeen;}    void PreloadScanner::write(const SegmentedString& source){#if PRELOAD_DEBUG    double startTime = currentTime();#endif    tokenize(source);#if PRELOAD_DEBUG    m_timeUsed += currentTime() - startTime;#endif}    static inline bool isWhitespace(UChar c){    return c == ' ' || c == '\n' || c == '\r' || c == '\t';}    inline void PreloadScanner::clearLastCharacters(){    memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(UChar));}    inline void PreloadScanner::rememberCharacter(UChar c){    m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize;    m_lastCharacters[m_lastCharacterIndex] = c;}    inline bool PreloadScanner::lastCharactersMatch(const char* chars, unsigned count) const{    unsigned pos = m_lastCharacterIndex;    while (count) {        if (chars[count - 1] != m_lastCharacters[pos])            return false;        --count;        if (!pos)            pos = lastCharactersBufferSize;        --pos;    }    return true;}    static inline unsigned legalEntityFor(unsigned value){    // FIXME There is a table for more exceptions in the HTML5 specification.    if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))        return 0xFFFD;    return value;}    unsigned PreloadScanner::consumeEntity(SegmentedString& source, bool& notEnoughCharacters){    enum EntityState {        Initial,        NumberType,        MaybeHex,        Hex,        Decimal,        Named    };    EntityState entityState = Initial;    unsigned result = 0;    Vector<UChar, 10> seenChars;    Vector<char, 10> entityName;        while (!source.isEmpty()) {        UChar cc = *source;        seenChars.append(cc);        switch (entityState) {        case Initial:            if (isWhitespace(cc) || cc == '<' || cc == '&')                return 0;            else if (cc == '#')                 entityState = NumberType;            else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {                entityName.append(cc);                entityState = Named;            } else                return 0;            break;        case NumberType:            if (cc == 'x' || cc == 'X')                entityState = MaybeHex;            else if (cc >= '0' && cc <= '9') {                entityState = Decimal;                result = cc - '0';            } else {                source.push('#');                return 0;            }            break;        case MaybeHex:            if (cc >= '0' && cc <= '9')                result = cc - '0';            else if (cc >= 'a' && cc <= 'f')                result = 10 + cc - 'a';            else if (cc >= 'A' && cc <= 'F')                result = 10 + cc - 'A';            else {                source.push(seenChars[1]);                source.push('#');                return 0;            }            entityState = Hex;            break;        case Hex:            if (cc >= '0' && cc <= '9')                result = result * 16 + cc - '0';            else if (cc >= 'a' && cc <= 'f')                result = result * 16 + 10 + cc - 'a';            else if (cc >= 'A' && cc <= 'F')                result = result * 16 + 10 + cc - 'A';            else if (cc == ';') {                source.advance();                return legalEntityFor(result);            } else                 return legalEntityFor(result);            break;        case Decimal:            if (cc >= '0' && cc <= '9')                result = result * 10 + cc - '0';            else if (cc == ';') {                source.advance();                return legalEntityFor(result);            } else                return legalEntityFor(result);            break;                       case Named:            // This is the attribute only version, generic version matches somewhat differently            while (entityName.size() <= 8) {                if (cc == ';') {                    const Entity* entity = findEntity(entityName.data(), entityName.size());                    if (entity) {                        source.advance();                        return entity->code;                    }                    break;                }                if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) {                    const Entity* entity = findEntity(entityName.data(), entityName.size());                    if (entity)                        return entity->code;                    break;                }                entityName.append(cc);                source.advance();                if (source.isEmpty())                    goto outOfCharacters;                cc = *source;                seenChars.append(cc);            }            if (seenChars.size() == 2)                source.push(seenChars[0]);            else if (seenChars.size() == 3) {                source.push(seenChars[1]);                source.push(seenChars[0]);            } else                source.prepend(SegmentedString(String(seenChars.data(), seenChars.size() - 1)));            return 0;        }        source.advance();    }outOfCharacters:    notEnoughCharacters = true;    source.prepend(SegmentedString(String(seenChars.data(), seenChars.size())));    return 0;}void PreloadScanner::tokenize(const SegmentedString& source){    ASSERT(m_inProgress);        m_source.append(source);    // This is a simplified HTML5 Tokenizer    // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0    while (!m_source.isEmpty()) {        UChar cc = *m_source;        switch (m_state) {        case Data:            while (1) {                rememberCharacter(cc);                if (cc == '&') {                    if (m_contentModel == PCDATA || m_contentModel == RCDATA) {                        m_state = EntityData;                        break;                    }                } else if (cc == '-') {                    if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) {                        if (lastCharactersMatch("<!--", 4))                            m_escape = true;                    }                } else if (cc == '<') {                    if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) {                        m_state = TagOpen;                        break;                    }                } else if (cc == '>') {                     if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) {                         if (lastCharactersMatch("-->", 3))                             m_escape = false;                     }                }                emitCharacter(cc);                m_source.advance();                if (m_source.isEmpty())                     return;                cc = *m_source;            }            break;        case EntityData:            // should try to consume the entity but we only care about entities in attributes            m_state = Data;            break;        case TagOpen:            if (m_contentModel == RCDATA || m_contentModel == CDATA) {                if (cc == '/')                    m_state = CloseTagOpen;                else {                    m_state = Data;                    continue;                }            } else if (m_contentModel == PCDATA) {                if (cc == '!')                    m_state = MarkupDeclarationOpen;                else if (cc == '/')                    m_state = CloseTagOpen;                else if (cc >= 'A' && cc <= 'Z') {                    m_tagName.clear();                    m_charset = String();                    m_tagName.append(cc + 0x20);                    m_closeTag = false;                    m_state = TagName;                } else if (cc >= 'a' && cc <= 'z') {                    m_tagName.clear();                    m_charset = String();                    m_tagName.append(cc);                    m_closeTag = false;                    m_state = TagName;                } else if (cc == '>') {                    m_state = Data;                } else if (cc == '?') {                    m_state = BogusComment;                } else {                    m_state = Data;                    continue;                }            }            break;        case CloseTagOpen:            if (m_contentModel == RCDATA || m_contentModel == CDATA) {                if (!m_lastStartTag.length()) {                    m_state = Data;                    continue;                }                if (m_source.length() < m_lastStartTag.length() + 1)                    return;                Vector<UChar> tmpString;                UChar tmpChar = 0;                bool match = true;                for (unsigned n = 0; n < m_lastStartTag.length() + 1; n++) {                    tmpChar = Unicode::toLower(*m_source);                    if (n < m_lastStartTag.length() && tmpChar != m_lastStartTag[n])                        match = false;                    tmpString.append(tmpChar);                    m_source.advance();                }                m_source.prepend(SegmentedString(String(tmpString.data(), tmpString.size())));                if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) {                    m_state = Data;                    continue;                }            }            if (cc >= 'A' && cc <= 'Z') {                m_tagName.clear();                m_charset = String();                m_tagName.append(cc + 0x20);                m_closeTag = true;                m_state = TagName;            } else if (cc >= 'a' && cc <= 'z') {                m_tagName.clear();                m_charset = String();                m_tagName.append(cc);                m_closeTag = true;                m_state = TagName;            } else if (cc == '>') {                m_state = Data;            } else                m_state = BogusComment;            break;        case TagName:            while (1) {                if (isWhitespace(cc)) {                    m_state = BeforeAttributeName;                    break;                }                if (cc == '>') {                    emitTag();                    m_state = Data;                    break;                }                if (cc == '/') {
preloadscanner.cpp - 源码说明

本页面展示了「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit」中的 preloadscanner.cpp 源码文件，采用 C++ 编程语言编写，共 859 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与WebKit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?