htmlparser.cpp

来自「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自Web」· C++ 代码 · 共 1,607 行 · 第 1/5 页
CPP
1,607 行
/*    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1999,2001 Lars Knoll (knoll@kde.org)              (C) 2000,2001 Dirk Mueller (mueller@kde.org)    Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*/#include "config.h"#include "HTMLParser.h"#include "CharacterNames.h"#include "CSSPropertyNames.h"#include "CSSValueKeywords.h"#include "Comment.h"#include "Console.h"#include "DOMWindow.h"#include "DocumentFragment.h"#include "DocumentType.h"#include "Frame.h"#include "HTMLBodyElement.h"#include "HTMLDocument.h"#include "HTMLDivElement.h"#include "HTMLDListElement.h"#include "HTMLElementFactory.h"#include "HTMLFormElement.h"#include "HTMLHeadElement.h"#include "HTMLHRElement.h"#include "HTMLHtmlElement.h"#include "HTMLIsIndexElement.h"#include "HTMLMapElement.h"#include "HTMLNames.h"#include "HTMLTableCellElement.h"#include "HTMLTableRowElement.h"#include "HTMLTableSectionElement.h"#include "HTMLTokenizer.h"#include "LocalizedStrings.h"#include "Settings.h"#include "Text.h"#include <wtf/StdLibExtras.h>    namespace WebCore {using namespace HTMLNames;static const unsigned cMaxRedundantTagDepth = 20;static const unsigned cResidualStyleMaxDepth = 200;struct HTMLStackElem : Noncopyable {    HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)        : tagName(t)        , level(lvl)        , strayTableContent(false)        , node(n)        , didRefNode(r)        , next(nx)    {    }    void derefNode()    {        if (didRefNode)            node->deref();    }    AtomicString tagName;    int level;    bool strayTableContent;    Node* node;    bool didRefNode;    HTMLStackElem* next;};/** * The parser parses tokenized input into the document, building up the * document tree. If the document is well-formed, parsing it is straightforward. * * Unfortunately, we have to handle many HTML documents that are not well-formed, * so the parser has to be tolerant about errors. * * We have to take care of at least the following error conditions: * * 1. The element being added is explicitly forbidden inside some outer tag. *    In this case we should close all tags up to the one, which forbids *    the element, and add it afterwards. * * 2. We are not allowed to add the element directly. It could be that *    the person writing the document forgot some tag in between (or that the *    tag in between is optional). This could be the case with the following *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?). * * 3. We want to add a block element inside to an inline element. Close all *    inline elements up to the next higher block element. * * 4. If this doesn't help, close elements until we are allowed to add the *    element or ignore the tag. * */HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)    : m_document(doc)    , m_current(doc)    , m_didRefCurrent(false)    , m_blockStack(0)    , m_hasPElementInScope(NotInScope)    , m_head(0)    , m_inBody(false)    , m_haveContent(false)    , m_haveFrameSet(false)    , m_isParsingFragment(false)    , m_reportErrors(reportErrors)    , m_handlingResidualStyleAcrossBlocks(false)    , m_inStrayTableContent(0){}HTMLParser::HTMLParser(DocumentFragment* frag)    : m_document(frag->document())    , m_current(frag)    , m_didRefCurrent(true)    , m_blockStack(0)    , m_hasPElementInScope(NotInScope)    , m_head(0)    , m_inBody(true)    , m_haveContent(false)    , m_haveFrameSet(false)    , m_isParsingFragment(true)    , m_reportErrors(false)    , m_handlingResidualStyleAcrossBlocks(false)    , m_inStrayTableContent(0){    if (frag)        frag->ref();}HTMLParser::~HTMLParser(){    freeBlock();    if (m_didRefCurrent)        m_current->deref(); }void HTMLParser::reset(){    ASSERT(!m_isParsingFragment);    setCurrent(m_document);    freeBlock();    m_inBody = false;    m_haveFrameSet = false;    m_haveContent = false;    m_inStrayTableContent = 0;    m_currentFormElement = 0;    m_currentMapElement = 0;    m_head = 0;    m_isindexElement = 0;    m_skipModeTag = nullAtom;}void HTMLParser::setCurrent(Node* newCurrent) {    bool didRefNewCurrent = newCurrent && newCurrent != m_document;    if (didRefNewCurrent)         newCurrent->ref();     if (m_didRefCurrent)         m_current->deref();    m_current = newCurrent;    m_didRefCurrent = didRefNewCurrent;}PassRefPtr<Node> HTMLParser::parseToken(Token* t){    if (!m_skipModeTag.isNull()) {        if (!t->beginTag && t->tagName == m_skipModeTag)            // Found the end tag for the current skip mode, so we're done skipping.            m_skipModeTag = nullAtom;        else if (m_current->localName() == t->tagName)            // Do not skip </iframe>.            // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?            ;        else            return 0;    }    // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.    if (t->isCloseTag(brTag) && m_document->inCompatMode()) {        reportError(MalformedBRError);        t->beginTag = true;    }    if (!t->beginTag) {        processCloseTag(t);        return 0;    }    // Ignore spaces, if we're not inside a paragraph or other inline code.    // Do not alter the text if it is part of a scriptTag.    if (t->tagName == textAtom && t->text && m_current->localName() != scriptTag) {        if (m_inBody && !skipMode() && m_current->localName() != styleTag &&            m_current->localName() != titleTag && !t->text->containsOnlyWhitespace())            m_haveContent = true;                RefPtr<Node> n;        String text = t->text.get();        unsigned charsLeft = text.length();        while (charsLeft) {            // split large blocks of text to nodes of manageable size            n = Text::createWithLengthLimit(m_document, text, charsLeft);            if (!insertNode(n.get(), t->selfClosingTag))                return 0;        }        return n;    }    RefPtr<Node> n = getNode(t);    // just to be sure, and to catch currently unimplemented stuff    if (!n)        return 0;    // set attributes    if (n->isHTMLElement()) {        HTMLElement* e = static_cast<HTMLElement*>(n.get());        e->setAttributeMap(t->attrs.get());        // take care of optional close tags        if (e->endTagRequirement() == TagStatusOptional)            popBlock(t->tagName);                    // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing        // syntax was used, report an error.        if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {            if (t->tagName == scriptTag)                reportError(IncorrectXMLCloseScriptWarning);            else                reportError(IncorrectXMLSelfCloseError, &t->tagName);        }    }    if (!insertNode(n.get(), t->selfClosingTag)) {        // we couldn't insert the node        if (n->isElementNode()) {            Element* e = static_cast<Element*>(n.get());            e->setAttributeMap(0);        }        if (m_currentMapElement == n)            m_currentMapElement = 0;        if (m_currentFormElement == n)            m_currentFormElement = 0;        if (m_head == n)            m_head = 0;        return 0;    }    return n;}void HTMLParser::parseDoctypeToken(DoctypeToken* t){    // Ignore any doctype after the first.  Ignore doctypes in fragments.    if (m_document->doctype() || m_isParsingFragment || m_current != m_document)        return;            // Make a new doctype node and set it as our doctype.    m_document->addChild(DocumentType::create(m_document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));}static bool isTableSection(const Node* n){    return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);}static bool isTablePart(const Node* n){    return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||           isTableSection(n);}static bool isTableRelated(const Node* n){    return n->hasTagName(tableTag) || isTablePart(n);}static bool isScopingTag(const AtomicString& tagName){    return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;}bool HTMLParser::insertNode(Node* n, bool flat){    RefPtr<Node> protectNode(n);    const AtomicString& localName = n->localName();    int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;        // <table> is never allowed inside stray table content.  Always pop out of the stray table content    // and close up the first table, and then start the second table as a sibling.    if (m_inStrayTableContent && localName == tableTag)        popBlock(tableTag);
htmlparser.cpp - 源码说明

本页面展示了「linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit」中的 htmlparser.cpp 源码文件，采用 C++ 编程语言编写，共 1,607 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与WebKit相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?