📄 htmltokenizer.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    while (!src.isEmpty()) {        checkScriptBuffer();        UChar ch = *src;        if (!m_scriptCodeResync && !m_brokenComments &&            !state.inXmp() && ch == '-' && m_scriptCodeSize >= 3 && !src.escaped() &&            m_scriptCode[m_scriptCodeSize - 3] == '<' && m_scriptCode[m_scriptCodeSize - 2] == '!' && m_scriptCode[m_scriptCodeSize - 1] == '-' &&            (lastDecodedEntityPosition < m_scriptCodeSize - 3)) {            state.setInComment(true);            state = parseComment(src, state);            continue;        }        if (m_scriptCodeResync && !tquote && ch == '>') {            src.advancePastNonNewline();            m_scriptCodeSize = m_scriptCodeResync - 1;            m_scriptCodeResync = 0;            m_scriptCode[m_scriptCodeSize] = m_scriptCode[m_scriptCodeSize + 1] = 0;            if (state.inScript())                state = scriptHandler(state);            else {                state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);                processToken();                if (state.inStyle()) {                     m_currentToken.tagName = styleTag.localName();                    m_currentToken.beginTag = false;                } else if (state.inTextArea()) {                     m_currentToken.tagName = textareaTag.localName();                    m_currentToken.beginTag = false;                } else if (state.inTitle()) {                     m_currentToken.tagName = titleTag.localName();                    m_currentToken.beginTag = false;                } else if (state.inXmp()) {                    m_currentToken.tagName = xmpTag.localName();                    m_currentToken.beginTag = false;                } else if (state.inIFrame()) {                    m_currentToken.tagName = iframeTag.localName();                    m_currentToken.beginTag = false;                }                processToken();                state.setInStyle(false);                state.setInScript(false);                state.setInTextArea(false);                state.setInTitle(false);                state.setInXmp(false);                state.setInIFrame(false);                tquote = NoQuote;                m_scriptCodeSize = m_scriptCodeResync = 0;            }            return state;        }        // possible end of tagname, lets check.        if (!m_scriptCodeResync && !state.escaped() && !src.escaped() && (ch == '>' || ch == '/' || isASCIISpace(ch)) &&             m_scriptCodeSize >= m_searchStopperLength &&             tagMatch(m_searchStopper, m_scriptCode + m_scriptCodeSize - m_searchStopperLength, m_searchStopperLength) &&             (lastDecodedEntityPosition < m_scriptCodeSize - m_searchStopperLength)) {            m_scriptCodeResync = m_scriptCodeSize-m_searchStopperLength+1;            tquote = NoQuote;            continue;        }        if (m_scriptCodeResync && !state.escaped()) {            if (ch == '\"')                tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);            else if (ch == '\'')                tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;            else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))                tquote = NoQuote;        }        state.setEscaped(!state.escaped() && ch == '\\');        if (!m_scriptCodeResync && (state.inTextArea() || state.inTitle() || state.inIFrame()) && !src.escaped() && ch == '&') {            UChar* scriptCodeDest = m_scriptCode + m_scriptCodeSize;            src.advancePastNonNewline();            state = parseEntity(src, scriptCodeDest, state, m_cBufferPos, true, false);            if (scriptCodeDest == m_scriptCode + m_scriptCodeSize)                lastDecodedEntityPosition = m_scriptCodeSize;            else                m_scriptCodeSize = scriptCodeDest - m_scriptCode;        } else {            m_scriptCode[m_scriptCodeSize++] = ch;            src.advance(m_lineNumber);        }    }    return state;}HTMLTokenizer::State HTMLTokenizer::scriptHandler(State state){    // We are inside a <script>    bool doScriptExec = false;    int startLine = m_currentScriptTagStartLineNumber + 1; // Script line numbers are 1 based, HTMLTokenzier line numbers are 0 based    // Reset m_currentScriptTagStartLineNumber to indicate that we've finished parsing the current script element    m_currentScriptTagStartLineNumber = 0;    // (Bugzilla 3837) Scripts following a frameset element should not execute or,     // in the case of extern scripts, even load.    bool followingFrameset = (m_doc->body() && m_doc->body()->hasTagName(framesetTag));      CachedScript* cs = 0;    // don't load external scripts for standalone documents (for now)    if (!inViewSourceMode()) {        if (!m_scriptTagSrcAttrValue.isEmpty() && m_doc->frame()) {            // forget what we just got; load from src url instead            if (!m_parser->skipMode() && !followingFrameset) {#ifdef INSTRUMENT_LAYOUT_SCHEDULING                if (!m_doc->ownerElement())                    printf("Requesting script at time %d\n", m_doc->elapsedTime());#endif                // The parser might have been stopped by for example a window.close call in an earlier script.                // If so, we don't want to load scripts.                if (!m_parserStopped && (cs = m_doc->docLoader()->requestScript(m_scriptTagSrcAttrValue, m_scriptTagCharsetAttrValue)))                    m_pendingScripts.append(cs);                else                    m_scriptNode = 0;            } else                m_scriptNode = 0;            m_scriptTagSrcAttrValue = String();        } else {            // Parse m_scriptCode containing <script> info            doScriptExec = m_scriptNode->shouldExecuteAsJavaScript();            m_scriptNode = 0;        }    }    state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize), state);    RefPtr<Node> node = processToken();    String scriptString = node ? node->textContent() : "";    m_currentToken.tagName = scriptTag.localName();    m_currentToken.beginTag = false;    processToken();    state.setInScript(false);    m_scriptCodeSize = m_scriptCodeResync = 0;        // FIXME: The script should be syntax highlighted.    if (inViewSourceMode())        return state;    SegmentedString* savedPrependingSrc = m_currentPrependingSrc;    SegmentedString prependingSrc;    m_currentPrependingSrc = &prependingSrc;    if (!m_parser->skipMode() && !followingFrameset) {        if (cs) {            if (savedPrependingSrc)                savedPrependingSrc->append(m_src);            else                m_pendingSrc.prepend(m_src);            setSrc(SegmentedString());            // the ref() call below may call notifyFinished if the script is already in cache,            // and that mucks with the state directly, so we must write it back to the object.            m_state = state;            bool savedRequestingScript = m_requestingScript;            m_requestingScript = true;            cs->addClient(this);            m_requestingScript = savedRequestingScript;            state = m_state;            // will be 0 if script was already loaded and ref() executed it            if (!m_pendingScripts.isEmpty())                state.setLoadingExtScript(true);        } else if (!m_fragment && doScriptExec) {            if (!m_executingScript)                m_pendingSrc.prepend(m_src);            else                prependingSrc = m_src;            setSrc(SegmentedString());            state = scriptExecution(ScriptSourceCode(scriptString, m_doc->frame() ? m_doc->frame()->document()->url() : KURL(), startLine), state);        }    }    if (!m_executingScript && !state.loadingExtScript()) {        m_src.append(m_pendingSrc);        m_pendingSrc.clear();    } else if (!prependingSrc.isEmpty()) {        // restore first so that the write appends in the right place        // (does not hurt to do it again below)        m_currentPrependingSrc = savedPrependingSrc;        // we need to do this slightly modified bit of one of the write() cases        // because we want to prepend to m_pendingSrc rather than appending        // if there's no previous prependingSrc        if (!m_pendingScripts.isEmpty()) {            if (m_currentPrependingSrc)                m_currentPrependingSrc->append(prependingSrc);            else                m_pendingSrc.prepend(prependingSrc);        } else {            m_state = state;            write(prependingSrc, false);            state = m_state;        }    }    #if PRELOAD_SCANNER_ENABLED    if (!m_pendingScripts.isEmpty() && !m_executingScript) {        if (!m_preloadScanner)            m_preloadScanner.set(new PreloadScanner(m_doc));        if (!m_preloadScanner->inProgress()) {            m_preloadScanner->begin();            m_preloadScanner->write(m_pendingSrc);        }    }#endif    m_currentPrependingSrc = savedPrependingSrc;    return state;}HTMLTokenizer::State HTMLTokenizer::scriptExecution(const ScriptSourceCode& sourceCode, State state){    if (m_fragment || !m_doc->frame())        return state;    m_executingScript++;    SegmentedString* savedPrependingSrc = m_currentPrependingSrc;    SegmentedString prependingSrc;    m_currentPrependingSrc = &prependingSrc;#ifdef INSTRUMENT_LAYOUT_SCHEDULING    if (!m_doc->ownerElement())        printf("beginning script execution at %d\n", m_doc->elapsedTime());#endif    m_state = state;    m_doc->frame()->loader()->executeScript(sourceCode);    state = m_state;    state.setAllowYield(true);#ifdef INSTRUMENT_LAYOUT_SCHEDULING    if (!m_doc->ownerElement())        printf("ending script execution at %d\n", m_doc->elapsedTime());#endif        m_executingScript--;    if (!m_executingScript && !state.loadingExtScript()) {        m_pendingSrc.prepend(prependingSrc);                m_src.append(m_pendingSrc);        m_pendingSrc.clear();    } else if (!prependingSrc.isEmpty()) {        // restore first so that the write appends in the right place        // (does not hurt to do it again below)        m_currentPrependingSrc = savedPrependingSrc;        // we need to do this slightly modified bit of one of the write() cases        // because we want to prepend to m_pendingSrc rather than appending        // if there's no previous prependingSrc        if (!m_pendingScripts.isEmpty()) {            if (m_currentPrependingSrc)                m_currentPrependingSrc->append(prependingSrc);            else                m_pendingSrc.prepend(prependingSrc);            #if PRELOAD_SCANNER_ENABLED            // We are stuck waiting for another script. Lets check the source that            // was just document.write()n for anything to load.            PreloadScanner documentWritePreloadScanner(m_doc);            documentWritePreloadScanner.begin();            documentWritePreloadScanner.write(prependingSrc);            documentWritePreloadScanner.end();#endif        } else {            m_state = state;            write(prependingSrc, false);            state = m_state;        }    }    m_currentPrependingSrc = savedPrependingSrc;    return state;}HTMLTokenizer::State HTMLTokenizer::parseComment(SegmentedString& src, State state){    // FIXME: Why does this code even run for comments inside <script> and <style>? This seems bogus.    checkScriptBuffer(src.length());    while (!src.isEmpty()) {        UChar ch = *src;        m_scriptCode[m_scriptCodeSize++] = ch;        if (ch == '>') {            bool handleBrokenComments = m_brokenComments && !(state.inScript() || state.inStyle());            int endCharsCount = 1; // start off with one for the '>' character            if (m_scriptCodeSize > 2 && m_scriptCode[m_scriptCodeSize-3] == '-' && m_scriptCode[m_scriptCodeSize-2] == '-') {                endCharsCount = 3;            } else if (m_scriptCodeSize > 3 && m_scriptCode[m_scriptCodeSize-4] == '-' && m_scriptCode[m_scriptCodeSize-3] == '-' &&                 m_scriptCode[m_scriptCodeSize-2] == '!') {                // Other browsers will accept --!> as a close comment, even though it's                // not technically valid.                endCharsCount = 4;            }            if (handleBrokenComments || endCharsCount > 1) {                src.advancePastNonNewline();                if (!(state.inTitle() || state.inScript() || state.inXmp() || state.inTextArea() || state.inStyle() || state.inIFrame())) {                    checkScriptBuffer();                    m_scriptCode[m_scriptCodeSize] = 0;                    m_scriptCode[m_scriptCodeSize + 1] = 0;                    m_currentToken.tagName = commentAtom;                    m_currentToken.beginTag = true;                    state = processListing(SegmentedString(m_scriptCode, m_scriptCodeSize - endCharsCount), state);                    processToken();                    m_currentToken.tagName = commentAtom;                    m_currentToken.beginTag = false;                    processToken();                    m_scriptCodeSize = 0;                }                state.setInComment(false);                return state; // Finished parsing comment            }        }        src.advance(m_lineNumber);    }    return state;}HTMLTokenizer::State HTMLTokenizer::parseServer(SegmentedString& src, State state){    checkScriptBuffer(src.length());    while (!src.isEmpty()) {        UChar ch = *src;        m_scriptCode[m_scriptCodeSize++] = ch;        if (ch == '>' && m_scriptCodeSize > 1 && m_scriptCode[m_scriptCodeSize - 2] == '%') {            src.advancePastNonNewline();            state.setInServer(false);            m_scriptCodeSize = 0;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -