📄 htmltokenizer.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
                lastIsSlash = curchar == '/';                src.advance(m_lineNumber);            }            break;        case SearchValue:            while (!src.isEmpty()) {                UChar curchar = *src;                if (!isASCIISpace(curchar)) {                    if (curchar == '\'' || curchar == '\"') {                        tquote = curchar == '\"' ? DoubleQuote : SingleQuote;                        state.setTagState(QuotedValue);                        if (inViewSourceMode())                            m_currentToken.addViewSourceChar(curchar);                        src.advancePastNonNewline();                    } else                        state.setTagState(Value);                    break;                }                if (inViewSourceMode())                    m_currentToken.addViewSourceChar(curchar);                src.advance(m_lineNumber);            }            break;        case QuotedValue:            while (!src.isEmpty()) {                checkBuffer();                UChar curchar = *src;                if (curchar <= '>' && !src.escaped()) {                    if (curchar == '>' && m_attrName.isEmpty()) {                        // Handle a case like <img '>.  Just go ahead and be willing                        // to close the whole tag.  Don't consume the character and                        // just go back into SearchEnd while ignoring the whole                        // value.                        // FIXME: Note that this is actually not a very good solution.                        // It doesn't handle the general case of                        // unmatched quotes among attributes that have names. -dwh                        while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))                            m_dest--; // remove trailing newlines                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);                        if (!attributeValue.contains('/'))                            m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?)                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());                        if (inViewSourceMode())                            m_currentToken.addViewSourceChar('x');                        state.setTagState(SearchAttribute);                        m_dest = m_buffer;                        tquote = NoQuote;                        break;                    }                                        if (curchar == '&') {                        src.advancePastNonNewline();                        state = parseEntity(src, m_dest, state, cBufferPos, true, true);                        break;                    }                    if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) {                        // some <input type=hidden> rely on trailing spaces. argh                        while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r'))                            m_dest--; // remove trailing newlines                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);                        if (m_attrName.isEmpty() && !attributeValue.contains('/')) {                            m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?)                            if (inViewSourceMode())                                m_currentToken.addViewSourceChar('x');                        } else if (inViewSourceMode())                            m_currentToken.addViewSourceChar('v');                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());                        m_dest = m_buffer;                        state.setTagState(SearchAttribute);                        tquote = NoQuote;                        if (inViewSourceMode())                            m_currentToken.addViewSourceChar(curchar);                        src.advancePastNonNewline();                        break;                    }                }                *m_dest++ = curchar;                src.advance(m_lineNumber);            }            break;        case Value:            while(!src.isEmpty()) {                checkBuffer();                UChar curchar = *src;                if (curchar <= '>' && !src.escaped()) {                    // parse Entities                    if (curchar == '&') {                        src.advancePastNonNewline();                        state = parseEntity(src, m_dest, state, cBufferPos, true, true);                        break;                    }                    // no quotes. Every space means end of value                    // '/' does not delimit in IE!                    if (isASCIISpace(curchar) || curchar == '>') {                        AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1);                        m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode());                        if (inViewSourceMode())                            m_currentToken.addViewSourceChar('v');                        m_dest = m_buffer;                        state.setTagState(SearchAttribute);                        break;                    }                }                *m_dest++ = curchar;                src.advance(m_lineNumber);            }            break;        case SearchEnd:        {            while (!src.isEmpty()) {                UChar ch = *src;                if (ch == '>' || ch == '<')                    break;                if (ch == '/')                    m_currentToken.selfClosingTag = true;                if (inViewSourceMode())                    m_currentToken.addViewSourceChar(ch);                src.advance(m_lineNumber);            }            if (src.isEmpty())                break;            searchCount = 0; // Stop looking for '<!--' sequence            state.setTagState(NoTag);            tquote = NoQuote;            if (*src != '<')                src.advance(m_lineNumber);            if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown                m_cBufferPos = cBufferPos;                return state;            }            AtomicString tagName = m_currentToken.tagName;            // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard            // compatibility.            bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag;            bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag;            if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) {                Attribute* a = 0;                m_scriptTagSrcAttrValue = String();                m_scriptTagCharsetAttrValue = String();                if (m_currentToken.attrs && !m_fragment) {                    if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) {                        if ((a = m_currentToken.attrs->getAttributeItem(srcAttr)))                            m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string();                    }                }            }            RefPtr<Node> n = processToken();            m_cBufferPos = cBufferPos;            if (n || inViewSourceMode()) {                if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) {                    if (beginTag)                        state.setDiscardLF(true); // Discard the first LF after we open a pre.                } else if (tagName == scriptTag) {                    ASSERT(!m_scriptNode);                    m_scriptNode = static_pointer_cast<HTMLScriptElement>(n);                    if (m_scriptNode)                        m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset();                    if (beginTag) {                        m_searchStopper = scriptEnd;                        m_searchStopperLength = 8;                        state.setInScript(true);                        state = parseSpecial(src, state);                    } else if (isSelfClosingScript) { // Handle <script src="foo"/>                        state.setInScript(true);                        state = scriptHandler(state);                    }                } else if (tagName == styleTag) {                    if (beginTag) {                        m_searchStopper = styleEnd;                        m_searchStopperLength = 7;                        state.setInStyle(true);                        state = parseSpecial(src, state);                    }                } else if (tagName == textareaTag) {                    if (beginTag) {                        m_searchStopper = textareaEnd;                        m_searchStopperLength = 10;                        state.setInTextArea(true);                        state = parseSpecial(src, state);                    }                } else if (tagName == titleTag) {                    if (beginTag) {                        m_searchStopper = titleEnd;                        m_searchStopperLength = 7;                        State savedState = state;                        SegmentedString savedSrc = src;                        long savedLineno = m_lineNumber;                        state.setInTitle(true);                        state = parseSpecial(src, state);                        if (state.inTitle() && src.isEmpty()) {                            // We just ate the rest of the document as the title #text node!                            // Reset the state then retokenize without special title handling.                            // Let the parser clean up the missing </title> tag.                            // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're                            // at the end of the document unless m_noMoreData is also true. We need                            // to detect this case elsewhere, and save the state somewhere other                            // than a local variable.                            state = savedState;                            src = savedSrc;                            m_lineNumber = savedLineno;                            m_scriptCodeSize = 0;                        }                    }                } else if (tagName == xmpTag) {                    if (beginTag) {                        m_searchStopper = xmpEnd;                        m_searchStopperLength = 5;                        state.setInXmp(true);                        state = parseSpecial(src, state);                    }                } else if (tagName == iframeTag) {                    if (beginTag) {                        m_searchStopper = iframeEnd;                        m_searchStopperLength = 8;                        state.setInIFrame(true);                        state = parseSpecial(src, state);                    }                }            }            if (tagName == plaintextTag)                state.setInPlainText(beginTag);            return state; // Finished parsing tag!        }        } // end switch    }    m_cBufferPos = cBufferPos;    return state;}inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state){    // We don't want to be checking elapsed time with every character, so we only check after we've    // processed a certain number of characters.    bool allowedYield = state.allowYield();    state.setAllowYield(false);    if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) {        processedCount = 0;        if (currentTime() - startTime > m_tokenizerTimeDelay) {            /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to               load, but this hurts overall performance on slower machines.  For now turn this               off.            || (!m_doc->haveStylesheetsLoaded() &&                 (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/            // Schedule the timer to keep processing as soon as possible.            m_timer.startOneShot(0);#ifdef INSTRUMENT_LAYOUT_SCHEDULING            if (currentTime() - startTime > m_tokenizerTimeDelay)                printf("Deferring processing of data because 500ms elapsed away from event loop.\n");#endif            return false;        }    }        processedCount++;    return true;}void HTMLTokenizer::write(const SegmentedString& str, bool appendData){    if (!m_buffer)        return;        if (m_parserStopped)        return;    SegmentedString source(str);    if (m_executingScript)        source.setExcludeLineNumbers();    if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) {        // don't parse; we will do this later        if (m_currentPrependingSrc)            m_currentPrependingSrc->append(source);        else {            m_pendingSrc.append(source);#if PRELOAD_SCANNER_ENABLED            if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)                m_preloadScanner->write(source);#endif        }        return;    }    #if PRELOAD_SCANNER_ENABLED    if (m_preloadScanner && m_preloadScanner->inProgress() && appendData)        m_preloadScanner->end();#endif    if (!m_src.isEmpty())        m_src.append(source);    else        setSrc(source);    // Once a timer is set, it has control of when the tokenizer continues.    if (m_timer.isActive())        return;    bool wasInWrite = m_inWrite;    m_inWrite = true;    #ifdef INSTRUMENT_LAYOUT_SCHEDULING    if (!m_doc->ownerElement())        printf("Beginning write at time %d\n", m_doc->elapsedTime());#endif        int processedCount = 0;    double startTime = currentTime();    Frame* frame = m_doc->frame();    State state = m_state;    while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -