📄 preloadscanner.cpp
字号:
/* * Copyright (C) 2008 Apple Inc. All Rights Reserved. * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */#include "config.h"#include "PreloadScanner.h"#include "AtomicString.h"#include "CachedCSSStyleSheet.h"#include "CachedImage.h"#include "CachedResource.h"#include "CachedResourceClient.h"#include "CachedScript.h"#include "CSSHelper.h"#include "CString.h"#include "DocLoader.h"#include "Document.h"#include "Frame.h"#include "FrameLoader.h"#include "HTMLLinkElement.h"#include "HTMLNames.h"#include <wtf/CurrentTime.h>#include <wtf/unicode/Unicode.h>#if COMPILER(GCC)// The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined.#include "HTMLEntityNames.c"#else// Not inlined for non-GCC compilersstruct Entity { const char* name; int code;};const struct Entity* findEntity(register const char* str, register unsigned int len);#endif#define PRELOAD_DEBUG 0using namespace WTF;namespace WebCore { using namespace HTMLNames; PreloadScanner::PreloadScanner(Document* doc) : m_inProgress(false) , m_timeUsed(0) , m_bodySeen(false) , m_document(doc){#if PRELOAD_DEBUG printf("CREATING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data());#endif} PreloadScanner::~PreloadScanner(){#if PRELOAD_DEBUG printf("DELETING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data()); printf("TOTAL TIME USED %.4fs\n", m_timeUsed);#endif} void PreloadScanner::begin() { ASSERT(!m_inProgress); reset(); m_inProgress = true; } void PreloadScanner::end() { ASSERT(m_inProgress); m_inProgress = false; }void PreloadScanner::reset(){ m_source.clear(); m_state = Data; m_escape = false; m_contentModel = PCDATA; m_commentPos = 0; m_closeTag = false; m_tagName.clear(); m_attributeName.clear(); m_attributeValue.clear(); m_lastStartTag = AtomicString(); m_urlToLoad = String(); m_charset = String(); m_linkIsStyleSheet = false; m_lastCharacterIndex = 0; clearLastCharacters(); m_cssState = CSSInitial; m_cssRule.clear(); m_cssRuleValue.clear();} bool PreloadScanner::scanningBody() const{ return m_document->body() || m_bodySeen;} void PreloadScanner::write(const SegmentedString& source){#if PRELOAD_DEBUG double startTime = currentTime();#endif tokenize(source);#if PRELOAD_DEBUG m_timeUsed += currentTime() - startTime;#endif} static inline bool isWhitespace(UChar c){ return c == ' ' || c == '\n' || c == '\r' || c == '\t';} inline void PreloadScanner::clearLastCharacters(){ memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(UChar));} inline void PreloadScanner::rememberCharacter(UChar c){ m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize; m_lastCharacters[m_lastCharacterIndex] = c;} inline bool PreloadScanner::lastCharactersMatch(const char* chars, unsigned count) const{ unsigned pos = m_lastCharacterIndex; while (count) { if (chars[count - 1] != m_lastCharacters[pos]) return false; --count; if (!pos) pos = lastCharactersBufferSize; --pos; } return true;} static inline unsigned legalEntityFor(unsigned value){ // FIXME There is a table for more exceptions in the HTML5 specification. if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) return 0xFFFD; return value;} unsigned PreloadScanner::consumeEntity(SegmentedString& source, bool& notEnoughCharacters){ enum EntityState { Initial, NumberType, MaybeHex, Hex, Decimal, Named }; EntityState entityState = Initial; unsigned result = 0; Vector<UChar, 10> seenChars; Vector<char, 10> entityName; while (!source.isEmpty()) { UChar cc = *source; seenChars.append(cc); switch (entityState) { case Initial: if (isWhitespace(cc) || cc == '<' || cc == '&') return 0; else if (cc == '#') entityState = NumberType; else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { entityName.append(cc); entityState = Named; } else return 0; break; case NumberType: if (cc == 'x' || cc == 'X') entityState = MaybeHex; else if (cc >= '0' && cc <= '9') { entityState = Decimal; result = cc - '0'; } else { source.push('#'); return 0; } break; case MaybeHex: if (cc >= '0' && cc <= '9') result = cc - '0'; else if (cc >= 'a' && cc <= 'f') result = 10 + cc - 'a'; else if (cc >= 'A' && cc <= 'F') result = 10 + cc - 'A'; else { source.push(seenChars[1]); source.push('#'); return 0; } entityState = Hex; break; case Hex: if (cc >= '0' && cc <= '9') result = result * 16 + cc - '0'; else if (cc >= 'a' && cc <= 'f') result = result * 16 + 10 + cc - 'a'; else if (cc >= 'A' && cc <= 'F') result = result * 16 + 10 + cc - 'A'; else if (cc == ';') { source.advance(); return legalEntityFor(result); } else return legalEntityFor(result); break; case Decimal: if (cc >= '0' && cc <= '9') result = result * 10 + cc - '0'; else if (cc == ';') { source.advance(); return legalEntityFor(result); } else return legalEntityFor(result); break; case Named: // This is the attribute only version, generic version matches somewhat differently while (entityName.size() <= 8) { if (cc == ';') { const Entity* entity = findEntity(entityName.data(), entityName.size()); if (entity) { source.advance(); return entity->code; } break; } if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) { const Entity* entity = findEntity(entityName.data(), entityName.size()); if (entity) return entity->code; break; } entityName.append(cc); source.advance(); if (source.isEmpty()) goto outOfCharacters; cc = *source; seenChars.append(cc); } if (seenChars.size() == 2) source.push(seenChars[0]); else if (seenChars.size() == 3) { source.push(seenChars[1]); source.push(seenChars[0]); } else source.prepend(SegmentedString(String(seenChars.data(), seenChars.size() - 1))); return 0; } source.advance(); }outOfCharacters: notEnoughCharacters = true; source.prepend(SegmentedString(String(seenChars.data(), seenChars.size()))); return 0;}void PreloadScanner::tokenize(const SegmentedString& source){ ASSERT(m_inProgress); m_source.append(source); // This is a simplified HTML5 Tokenizer // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 while (!m_source.isEmpty()) { UChar cc = *m_source; switch (m_state) { case Data: while (1) { rememberCharacter(cc); if (cc == '&') { if (m_contentModel == PCDATA || m_contentModel == RCDATA) { m_state = EntityData; break; } } else if (cc == '-') { if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) { if (lastCharactersMatch("<!--", 4)) m_escape = true; } } else if (cc == '<') { if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) { m_state = TagOpen; break; } } else if (cc == '>') { if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) { if (lastCharactersMatch("-->", 3)) m_escape = false; } } emitCharacter(cc); m_source.advance(); if (m_source.isEmpty()) return; cc = *m_source; } break; case EntityData: // should try to consume the entity but we only care about entities in attributes m_state = Data; break; case TagOpen: if (m_contentModel == RCDATA || m_contentModel == CDATA) { if (cc == '/') m_state = CloseTagOpen; else { m_state = Data; continue; } } else if (m_contentModel == PCDATA) { if (cc == '!') m_state = MarkupDeclarationOpen; else if (cc == '/') m_state = CloseTagOpen; else if (cc >= 'A' && cc <= 'Z') { m_tagName.clear(); m_charset = String(); m_tagName.append(cc + 0x20); m_closeTag = false; m_state = TagName; } else if (cc >= 'a' && cc <= 'z') { m_tagName.clear(); m_charset = String(); m_tagName.append(cc); m_closeTag = false; m_state = TagName; } else if (cc == '>') { m_state = Data; } else if (cc == '?') { m_state = BogusComment; } else { m_state = Data; continue; } } break; case CloseTagOpen: if (m_contentModel == RCDATA || m_contentModel == CDATA) { if (!m_lastStartTag.length()) { m_state = Data; continue; } if (m_source.length() < m_lastStartTag.length() + 1) return; Vector<UChar> tmpString; UChar tmpChar = 0; bool match = true; for (unsigned n = 0; n < m_lastStartTag.length() + 1; n++) { tmpChar = Unicode::toLower(*m_source); if (n < m_lastStartTag.length() && tmpChar != m_lastStartTag[n]) match = false; tmpString.append(tmpChar); m_source.advance(); } m_source.prepend(SegmentedString(String(tmpString.data(), tmpString.size()))); if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) { m_state = Data; continue; } } if (cc >= 'A' && cc <= 'Z') { m_tagName.clear(); m_charset = String(); m_tagName.append(cc + 0x20); m_closeTag = true; m_state = TagName; } else if (cc >= 'a' && cc <= 'z') { m_tagName.clear(); m_charset = String(); m_tagName.append(cc); m_closeTag = true; m_state = TagName; } else if (cc == '>') { m_state = Data; } else m_state = BogusComment; break; case TagName: while (1) { if (isWhitespace(cc)) { m_state = BeforeAttributeName; break; } if (cc == '>') { emitTag(); m_state = Data; break; } if (cc == '/') {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -