📄 htmltokenizer.cpp
字号:
lastIsSlash = curchar == '/'; src.advance(m_lineNumber); } break; case SearchValue: while (!src.isEmpty()) { UChar curchar = *src; if (!isASCIISpace(curchar)) { if (curchar == '\'' || curchar == '\"') { tquote = curchar == '\"' ? DoubleQuote : SingleQuote; state.setTagState(QuotedValue); if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar); src.advancePastNonNewline(); } else state.setTagState(Value); break; } if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar); src.advance(m_lineNumber); } break; case QuotedValue: while (!src.isEmpty()) { checkBuffer(); UChar curchar = *src; if (curchar <= '>' && !src.escaped()) { if (curchar == '>' && m_attrName.isEmpty()) { // Handle a case like <img '>. Just go ahead and be willing // to close the whole tag. Don't consume the character and // just go back into SearchEnd while ignoring the whole // value. // FIXME: Note that this is actually not a very good solution. // It doesn't handle the general case of // unmatched quotes among attributes that have names. -dwh while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) m_dest--; // remove trailing newlines AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); if (!attributeValue.contains('/')) m_attrName = attributeValue; // Just make the name/value match. (FIXME: Is this some WinIE quirk?) m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); if (inViewSourceMode()) m_currentToken.addViewSourceChar('x'); state.setTagState(SearchAttribute); m_dest = m_buffer; tquote = NoQuote; break; } if (curchar == '&') { src.advancePastNonNewline(); state = parseEntity(src, m_dest, state, cBufferPos, true, true); break; } if ((tquote == SingleQuote && curchar == '\'') || (tquote == DoubleQuote && curchar == '\"')) { // some <input type=hidden> rely on trailing spaces. argh while (m_dest > m_buffer + 1 && (m_dest[-1] == '\n' || m_dest[-1] == '\r')) m_dest--; // remove trailing newlines AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); if (m_attrName.isEmpty() && !attributeValue.contains('/')) { m_attrName = attributeValue; // Make the name match the value. (FIXME: Is this a WinIE quirk?) if (inViewSourceMode()) m_currentToken.addViewSourceChar('x'); } else if (inViewSourceMode()) m_currentToken.addViewSourceChar('v'); m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); m_dest = m_buffer; state.setTagState(SearchAttribute); tquote = NoQuote; if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar); src.advancePastNonNewline(); break; } } *m_dest++ = curchar; src.advance(m_lineNumber); } break; case Value: while(!src.isEmpty()) { checkBuffer(); UChar curchar = *src; if (curchar <= '>' && !src.escaped()) { // parse Entities if (curchar == '&') { src.advancePastNonNewline(); state = parseEntity(src, m_dest, state, cBufferPos, true, true); break; } // no quotes. Every space means end of value // '/' does not delimit in IE! if (isASCIISpace(curchar) || curchar == '>') { AtomicString attributeValue(m_buffer + 1, m_dest - m_buffer - 1); m_currentToken.addAttribute(m_attrName, attributeValue, inViewSourceMode()); if (inViewSourceMode()) m_currentToken.addViewSourceChar('v'); m_dest = m_buffer; state.setTagState(SearchAttribute); break; } } *m_dest++ = curchar; src.advance(m_lineNumber); } break; case SearchEnd: { while (!src.isEmpty()) { UChar ch = *src; if (ch == '>' || ch == '<') break; if (ch == '/') m_currentToken.selfClosingTag = true; if (inViewSourceMode()) m_currentToken.addViewSourceChar(ch); src.advance(m_lineNumber); } if (src.isEmpty()) break; searchCount = 0; // Stop looking for '<!--' sequence state.setTagState(NoTag); tquote = NoQuote; if (*src != '<') src.advance(m_lineNumber); if (m_currentToken.tagName == nullAtom) { //stop if tag is unknown m_cBufferPos = cBufferPos; return state; } AtomicString tagName = m_currentToken.tagName; // Handle <script src="foo"/> like Mozilla/Opera. We have to do this now for Dashboard // compatibility. bool isSelfClosingScript = m_currentToken.selfClosingTag && m_currentToken.beginTag && m_currentToken.tagName == scriptTag; bool beginTag = !m_currentToken.selfClosingTag && m_currentToken.beginTag; if (m_currentToken.beginTag && m_currentToken.tagName == scriptTag && !inViewSourceMode() && !m_parser->skipMode()) { Attribute* a = 0; m_scriptTagSrcAttrValue = String(); m_scriptTagCharsetAttrValue = String(); if (m_currentToken.attrs && !m_fragment) { if (m_doc->frame() && m_doc->frame()->script()->isEnabled()) { if ((a = m_currentToken.attrs->getAttributeItem(srcAttr))) m_scriptTagSrcAttrValue = m_doc->completeURL(parseURL(a->value())).string(); } } } RefPtr<Node> n = processToken(); m_cBufferPos = cBufferPos; if (n || inViewSourceMode()) { if ((tagName == preTag || tagName == listingTag) && !inViewSourceMode()) { if (beginTag) state.setDiscardLF(true); // Discard the first LF after we open a pre. } else if (tagName == scriptTag) { ASSERT(!m_scriptNode); m_scriptNode = static_pointer_cast<HTMLScriptElement>(n); if (m_scriptNode) m_scriptTagCharsetAttrValue = m_scriptNode->scriptCharset(); if (beginTag) { m_searchStopper = scriptEnd; m_searchStopperLength = 8; state.setInScript(true); state = parseSpecial(src, state); } else if (isSelfClosingScript) { // Handle <script src="foo"/> state.setInScript(true); state = scriptHandler(state); } } else if (tagName == styleTag) { if (beginTag) { m_searchStopper = styleEnd; m_searchStopperLength = 7; state.setInStyle(true); state = parseSpecial(src, state); } } else if (tagName == textareaTag) { if (beginTag) { m_searchStopper = textareaEnd; m_searchStopperLength = 10; state.setInTextArea(true); state = parseSpecial(src, state); } } else if (tagName == titleTag) { if (beginTag) { m_searchStopper = titleEnd; m_searchStopperLength = 7; State savedState = state; SegmentedString savedSrc = src; long savedLineno = m_lineNumber; state.setInTitle(true); state = parseSpecial(src, state); if (state.inTitle() && src.isEmpty()) { // We just ate the rest of the document as the title #text node! // Reset the state then retokenize without special title handling. // Let the parser clean up the missing </title> tag. // FIXME: This is incorrect, because src.isEmpty() doesn't mean we're // at the end of the document unless m_noMoreData is also true. We need // to detect this case elsewhere, and save the state somewhere other // than a local variable. state = savedState; src = savedSrc; m_lineNumber = savedLineno; m_scriptCodeSize = 0; } } } else if (tagName == xmpTag) { if (beginTag) { m_searchStopper = xmpEnd; m_searchStopperLength = 5; state.setInXmp(true); state = parseSpecial(src, state); } } else if (tagName == iframeTag) { if (beginTag) { m_searchStopper = iframeEnd; m_searchStopperLength = 8; state.setInIFrame(true); state = parseSpecial(src, state); } } } if (tagName == plaintextTag) state.setInPlainText(beginTag); return state; // Finished parsing tag! } } // end switch } m_cBufferPos = cBufferPos; return state;}inline bool HTMLTokenizer::continueProcessing(int& processedCount, double startTime, State &state){ // We don't want to be checking elapsed time with every character, so we only check after we've // processed a certain number of characters. bool allowedYield = state.allowYield(); state.setAllowYield(false); if (!state.loadingExtScript() && !state.forceSynchronous() && !m_executingScript && (processedCount > m_tokenizerChunkSize || allowedYield)) { processedCount = 0; if (currentTime() - startTime > m_tokenizerTimeDelay) { /* FIXME: We'd like to yield aggressively to give stylesheets the opportunity to load, but this hurts overall performance on slower machines. For now turn this off. || (!m_doc->haveStylesheetsLoaded() && (m_doc->documentElement()->id() != ID_HTML || m_doc->body()))) {*/ // Schedule the timer to keep processing as soon as possible. m_timer.startOneShot(0);#ifdef INSTRUMENT_LAYOUT_SCHEDULING if (currentTime() - startTime > m_tokenizerTimeDelay) printf("Deferring processing of data because 500ms elapsed away from event loop.\n");#endif return false; } } processedCount++; return true;}void HTMLTokenizer::write(const SegmentedString& str, bool appendData){ if (!m_buffer) return; if (m_parserStopped) return; SegmentedString source(str); if (m_executingScript) source.setExcludeLineNumbers(); if ((m_executingScript && appendData) || !m_pendingScripts.isEmpty()) { // don't parse; we will do this later if (m_currentPrependingSrc) m_currentPrependingSrc->append(source); else { m_pendingSrc.append(source);#if PRELOAD_SCANNER_ENABLED if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) m_preloadScanner->write(source);#endif } return; } #if PRELOAD_SCANNER_ENABLED if (m_preloadScanner && m_preloadScanner->inProgress() && appendData) m_preloadScanner->end();#endif if (!m_src.isEmpty()) m_src.append(source); else setSrc(source); // Once a timer is set, it has control of when the tokenizer continues. if (m_timer.isActive()) return; bool wasInWrite = m_inWrite; m_inWrite = true; #ifdef INSTRUMENT_LAYOUT_SCHEDULING if (!m_doc->ownerElement()) printf("Beginning write at time %d\n", m_doc->elapsedTime());#endif int processedCount = 0; double startTime = currentTime(); Frame* frame = m_doc->frame(); State state = m_state; while (!m_src.isEmpty() && (!frame || !frame->loader()->isScheduledLocationChangePending())) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -