📄 htmltokenizer.cpp
字号:
if (inViewSourceMode()) processDoctypeToken(); } else if (isWhitespace) { src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else m_doctypeToken.setState(DoctypeBogus); break; } case DoctypePublicID: { if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { src.advancePastNonNewline(); m_doctypeToken.setState(DoctypeAfterPublicID); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else if (c == '>') { // Considered bogus. Don't process the doctype. src.advancePastNonNewline(); state.setInDoctype(false); if (inViewSourceMode()) processDoctypeToken(); } else { m_doctypeToken.m_publicID.append(c); src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } break; } case DoctypeAfterPublicID: if (c == '\"' || c == '\'') { tquote = c == '\"' ? DoubleQuote : SingleQuote; m_doctypeToken.setState(DoctypeSystemID); src.advancePastNonNewline(); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else if (c == '>') { // Valid doctype. Emit it now. src.advancePastNonNewline(); state.setInDoctype(false); processDoctypeToken(); } else if (isWhitespace) { src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else m_doctypeToken.setState(DoctypeBogus); break; case DoctypeBeforeSystemID: if (c == '\"' || c == '\'') { tquote = c == '\"' ? DoubleQuote : SingleQuote; m_doctypeToken.setState(DoctypeSystemID); src.advancePastNonNewline(); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else if (c == '>') { // Considered bogus. Don't process the doctype. src.advancePastNonNewline(); state.setInDoctype(false); } else if (isWhitespace) { src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else m_doctypeToken.setState(DoctypeBogus); break; case DoctypeSystemID: if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) { src.advancePastNonNewline(); m_doctypeToken.setState(DoctypeAfterSystemID); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else if (c == '>') { // Considered bogus. Don't process the doctype. src.advancePastNonNewline(); state.setInDoctype(false); if (inViewSourceMode()) processDoctypeToken(); } else { m_doctypeToken.m_systemID.append(c); src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } break; case DoctypeAfterSystemID: if (c == '>') { // Valid doctype. Emit it now. src.advancePastNonNewline(); state.setInDoctype(false); processDoctypeToken(); } else if (isWhitespace) { src.advance(m_lineNumber); if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } else m_doctypeToken.setState(DoctypeBogus); break; case DoctypeBogus: if (c == '>') { // Done with the bogus doctype. src.advancePastNonNewline(); state.setInDoctype(false); if (inViewSourceMode()) processDoctypeToken(); } else { src.advance(m_lineNumber); // Just keep scanning for '>' if (inViewSourceMode()) m_doctypeToken.m_source.append(c); } break; default: break; } } return state;}HTMLTokenizer::State HTMLTokenizer::parseTag(SegmentedString& src, State state){ ASSERT(!state.hasEntityState()); unsigned cBufferPos = m_cBufferPos; bool lastIsSlash = false; while (!src.isEmpty()) { checkBuffer(); switch(state.tagState()) { case NoTag: { m_cBufferPos = cBufferPos; return state; } case TagName: { if (searchCount > 0) { if (*src == commentStart[searchCount]) { searchCount++; if (searchCount == 2) m_doctypeSearchCount++; // A '!' is also part of a doctype, so we are moving through that still as well. else m_doctypeSearchCount = 0; if (searchCount == 4) { // Found '<!--' sequence src.advancePastNonNewline(); m_dest = m_buffer; // ignore the previous part of this tag state.setInComment(true); state.setTagState(NoTag); // Fix bug 34302 at kde.bugs.org. Go ahead and treat // <!--> as a valid comment, since both mozilla and IE on windows // can handle this case. Only do this in quirks mode. -dwh if (!src.isEmpty() && *src == '>' && m_doc->inCompatMode()) { state.setInComment(false); src.advancePastNonNewline(); if (!src.isEmpty()) m_cBuffer[cBufferPos++] = *src; } else state = parseComment(src, state); m_cBufferPos = cBufferPos; return state; // Finished parsing tag! } m_cBuffer[cBufferPos++] = *src; src.advancePastNonNewline(); break; } else searchCount = 0; // Stop looking for '<!--' sequence } if (m_doctypeSearchCount > 0) { if (toASCIILower(*src) == doctypeStart[m_doctypeSearchCount]) { m_doctypeSearchCount++; m_cBuffer[cBufferPos++] = *src; src.advancePastNonNewline(); if (m_doctypeSearchCount == 9) { // Found '<!DOCTYPE' sequence state.setInDoctype(true); state.setTagState(NoTag); m_doctypeToken.reset(); if (inViewSourceMode()) m_doctypeToken.m_source.append(m_cBuffer, cBufferPos); state = parseDoctype(src, state); m_cBufferPos = cBufferPos; return state; } break; } else m_doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence } bool finish = false; unsigned int ll = min(src.length(), CBUFLEN - cBufferPos); while (ll--) { UChar curchar = *src; if (isASCIISpace(curchar) || curchar == '>' || curchar == '<') { finish = true; break; } // tolower() shows up on profiles. This is faster! if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); else m_cBuffer[cBufferPos++] = curchar; src.advancePastNonNewline(); } // Disadvantage: we add the possible rest of the tag // as attribute names. ### judge if this causes problems if (finish || CBUFLEN == cBufferPos) { bool beginTag; UChar* ptr = m_cBuffer; unsigned int len = cBufferPos; m_cBuffer[cBufferPos] = '\0'; if ((cBufferPos > 0) && (*ptr == '/')) { // End Tag beginTag = false; ptr++; len--; } else // Start Tag beginTag = true; // Ignore the / in fake xml tags like <br/>. We trim off the "/" so that we'll get "br" as the tag name and not "br/". if (len > 1 && ptr[len-1] == '/' && !inViewSourceMode()) ptr[--len] = '\0'; // Now that we've shaved off any invalid / that might have followed the name), make the tag. // FIXME: FireFox and WinIE turn !foo nodes into comments, we ignore comments. (fast/parser/tag-with-exclamation-point.html) if (ptr[0] != '!' || inViewSourceMode()) { m_currentToken.tagName = AtomicString(ptr); m_currentToken.beginTag = beginTag; } m_dest = m_buffer; state.setTagState(SearchAttribute); cBufferPos = 0; } break; } case SearchAttribute: while(!src.isEmpty()) { UChar curchar = *src; // In this mode just ignore any quotes we encounter and treat them like spaces. if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"') { if (curchar == '<' || curchar == '>') state.setTagState(SearchEnd); else state.setTagState(AttributeName); cBufferPos = 0; break; } if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar); src.advance(m_lineNumber); } break; case AttributeName: { int ll = min(src.length(), CBUFLEN - cBufferPos); while (ll--) { UChar curchar = *src; // If we encounter a "/" when scanning an attribute name, treat it as a delimiter. This allows the // cases like <input type=checkbox checked/> to work (and accommodates XML-style syntax as per HTML5). if (curchar <= '>' && (curchar >= '<' || isASCIISpace(curchar) || curchar == '/')) { m_cBuffer[cBufferPos] = '\0'; m_attrName = AtomicString(m_cBuffer); m_dest = m_buffer; *m_dest++ = 0; state.setTagState(SearchEqual); if (inViewSourceMode()) m_currentToken.addViewSourceChar('a'); break; } // tolower() shows up on profiles. This is faster! if (curchar >= 'A' && curchar <= 'Z' && !inViewSourceMode()) m_cBuffer[cBufferPos++] = curchar + ('a' - 'A'); else m_cBuffer[cBufferPos++] = curchar; src.advance(m_lineNumber); } if (cBufferPos == CBUFLEN) { m_cBuffer[cBufferPos] = '\0'; m_attrName = AtomicString(m_cBuffer); m_dest = m_buffer; *m_dest++ = 0; state.setTagState(SearchEqual); if (inViewSourceMode()) m_currentToken.addViewSourceChar('a'); } break; } case SearchEqual: while (!src.isEmpty()) { UChar curchar = *src; if (lastIsSlash && curchar == '>') { // This is a quirk (with a long sad history). We have to do this // since widgets do <script src="foo.js"/> and expect the tag to close. if (m_currentToken.tagName == scriptTag) m_currentToken.selfClosingTag = true; m_currentToken.brokenXMLStyle = true; } // In this mode just ignore any quotes or slashes we encounter and treat them like spaces. if (!isASCIISpace(curchar) && curchar != '\'' && curchar != '"' && curchar != '/') { if (curchar == '=') { state.setTagState(SearchValue); if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar); src.advancePastNonNewline(); } else { m_currentToken.addAttribute(m_attrName, emptyAtom, inViewSourceMode()); m_dest = m_buffer; state.setTagState(SearchAttribute); lastIsSlash = false; } break; } if (inViewSourceMode()) m_currentToken.addViewSourceChar(curchar);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -