⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 linux下开源浏览器WebKit的源码,市面上的很多商用浏览器都是移植自WebKit
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    if (node->isHTMLElement()) {        HTMLElement* e = static_cast<HTMLElement*>(node);        if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||            e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||            e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||            e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||            e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||            e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||            e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||            e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||            e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||            e->hasLocalName(noembedTag))            return true;        if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {            Settings* settings = m_document->settings();            if (settings && settings->isJavaScriptEnabled())                return true;        }    }        return false;}bool HTMLParser::isResidualStyleTag(const AtomicString& tagName){    DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());    if (residualStyleTags.isEmpty()) {        residualStyleTags.add(aTag.localName().impl());        residualStyleTags.add(fontTag.localName().impl());        residualStyleTags.add(ttTag.localName().impl());        residualStyleTags.add(uTag.localName().impl());        residualStyleTags.add(bTag.localName().impl());        residualStyleTags.add(iTag.localName().impl());        residualStyleTags.add(sTag.localName().impl());        residualStyleTags.add(strikeTag.localName().impl());        residualStyleTags.add(bigTag.localName().impl());        residualStyleTags.add(smallTag.localName().impl());        residualStyleTags.add(emTag.localName().impl());        residualStyleTags.add(strongTag.localName().impl());        residualStyleTags.add(dfnTag.localName().impl());        residualStyleTags.add(codeTag.localName().impl());        residualStyleTags.add(sampTag.localName().impl());        residualStyleTags.add(kbdTag.localName().impl());        residualStyleTags.add(varTag.localName().impl());        residualStyleTags.add(nobrTag.localName().impl());    }        return residualStyleTags.contains(tagName.impl());}bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName){    DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());    if (unaffectedTags.isEmpty()) {        unaffectedTags.add(bodyTag.localName().impl());        unaffectedTags.add(tableTag.localName().impl());        unaffectedTags.add(theadTag.localName().impl());        unaffectedTags.add(tbodyTag.localName().impl());        unaffectedTags.add(tfootTag.localName().impl());        unaffectedTags.add(trTag.localName().impl());        unaffectedTags.add(thTag.localName().impl());        unaffectedTags.add(tdTag.localName().impl());        unaffectedTags.add(captionTag.localName().impl());        unaffectedTags.add(colgroupTag.localName().impl());        unaffectedTags.add(colTag.localName().impl());        unaffectedTags.add(optionTag.localName().impl());        unaffectedTags.add(optgroupTag.localName().impl());        unaffectedTags.add(selectTag.localName().impl());        unaffectedTags.add(objectTag.localName().impl());    }        return !unaffectedTags.contains(tagName.impl());}void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem){    HTMLStackElem* maxElem = 0;    bool finished = false;    bool strayTableContent = elem->strayTableContent;    m_handlingResidualStyleAcrossBlocks = true;    while (!finished) {        // Find the outermost element that crosses over to a higher level. If there exists another higher-level        // element, we will do another pass, until we have corrected the innermost one.        ExceptionCode ec = 0;        HTMLStackElem* curr = m_blockStack;        HTMLStackElem* prev = 0;        HTMLStackElem* prevMaxElem = 0;        maxElem = 0;        finished = true;        while (curr && curr != elem) {            if (curr->level > elem->level) {                if (!isAffectedByResidualStyle(curr->tagName))                    return;                if (maxElem)                    // We will need another pass.                    finished = false;                maxElem = curr;                prevMaxElem = prev;            }            prev = curr;            curr = curr->next;        }        if (!curr || !maxElem)            return;        Node* residualElem = prev->node;        Node* blockElem = prevMaxElem ? prevMaxElem->node : m_current;        Node* parentElem = elem->node;        // Check to see if the reparenting that is going to occur is allowed according to the DOM.        // FIXME: We should either always allow it or perform an additional fixup instead of        // just bailing here.        // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.        if (!parentElem->childAllowed(blockElem))            return;        m_hasPElementInScope = Unknown;        if (maxElem->node->parentNode() != elem->node) {            // Walk the stack and remove any elements that aren't residual style tags.  These            // are basically just being closed up.  Example:            // <font><span>Moo<p>Goo</font></p>.            // In the above example, the <span> doesn't need to be reopened.  It can just close.            HTMLStackElem* currElem = maxElem->next;            HTMLStackElem* prevElem = maxElem;            while (currElem != elem) {                HTMLStackElem* nextElem = currElem->next;                if (!isResidualStyleTag(currElem->tagName)) {                    prevElem->next = nextElem;                    prevElem->derefNode();                    prevElem->node = currElem->node;                    prevElem->didRefNode = currElem->didRefNode;                    delete currElem;                }                else                    prevElem = currElem;                currElem = nextElem;            }            // We have to reopen residual tags in between maxElem and elem.  An example of this case is:            // <font><i>Moo<p>Foo</font>.            // In this case, we need to transform the part before the <p> into:            // <font><i>Moo</i></font><i>            // so that the <i> will remain open.  This involves the modification of elements            // in the block stack.            // This will also affect how we ultimately reparent the block, since we want it to end up            // under the reopened residual tags (e.g., the <i> in the above example.)            RefPtr<Node> prevNode = 0;            currElem = maxElem;            while (currElem->node != residualElem) {                if (isResidualStyleTag(currElem->node->localName())) {                    // Create a clone of this element.                    // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.                    Node* currNode = currElem->node->cloneNode(false).releaseRef();                    reportError(ResidualStyleError, &currNode->localName());                        // Change the stack element's node to point to the clone.                    // The stack element adopts the reference we obtained above by calling release().                    currElem->derefNode();                    currElem->node = currNode;                    currElem->didRefNode = true;                    // Attach the previous node as a child of this new node.                    if (prevNode)                        currNode->appendChild(prevNode, ec);                    else // The new parent for the block element is going to be the innermost clone.                        parentElem = currNode;  // FIXME: We shifted parentElem to be a residual inline.  We never checked to see if blockElem could be legally placed inside the inline though.                    prevNode = currNode;                }                currElem = currElem->next;            }            // Now append the chain of new residual style elements if one exists.            if (prevNode)                elem->node->appendChild(prevNode, ec);  // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.        }        // Check if the block is still in the tree. If it isn't, then we don't        // want to remove it from its parent (that would crash) or insert it into        // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778        bool isBlockStillInTree = blockElem->parentNode();        // We need to make a clone of |residualElem| and place it just inside |blockElem|.        // All content of |blockElem| is reparented to be under this clone.  We then        // reparent |blockElem| using real DOM calls so that attachment/detachment will        // be performed to fix up the rendering tree.        // So for this example: <b>...<p>Foo</b>Goo</p>        // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>        //        // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.        if (isBlockStillInTree)            blockElem->parentNode()->removeChild(blockElem, ec);        Node* newNodePtr = 0;        if (blockElem->firstChild()) {            // Step 2: Clone |residualElem|.            RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.            newNodePtr = newNode.get();            reportError(ResidualStyleError, &newNode->localName());            // Step 3: Place |blockElem|'s children under |newNode|.  Remove all of the children of |blockElem|            // before we've put |newElem| into the document.  That way we'll only do one attachment of all            // the new content (instead of a bunch of individual attachments).            Node* currNode = blockElem->firstChild();            while (currNode) {                Node* nextNode = currNode->nextSibling();                newNode->appendChild(currNode, ec);                currNode = nextNode;            }            // Step 4: Place |newNode| under |blockElem|.  |blockElem| is still out of the document, so no            // attachment can occur yet.            blockElem->appendChild(newNode.release(), ec);        } else            finished = true;        // Step 5: Reparent |blockElem|.  Now the full attachment of the fixed up tree takes place.        if (isBlockStillInTree)            parentElem->appendChild(blockElem, ec);        // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us.  Also update        // the node associated with the previous stack element so that when it gets popped,        // it doesn't make the residual element the next current node.        HTMLStackElem* currElem = maxElem;        HTMLStackElem* prevElem = 0;        while (currElem != elem) {            prevElem = currElem;            currElem = currElem->next;        }        prevElem->next = elem->next;        prevElem->derefNode();        prevElem->node = elem->node;        prevElem->didRefNode = elem->didRefNode;        if (!finished) {            // Repurpose |elem| to represent |newNode| and insert it at the appropriate position            // in the stack. We do not do this for the innermost block, because in that case the new            // node is effectively no longer open.            elem->next = maxElem;            elem->node = prevMaxElem->node;            elem->didRefNode = prevMaxElem->didRefNode;            elem->strayTableContent = false;            prevMaxElem->next = elem;            ASSERT(newNodePtr);            prevMaxElem->node = newNodePtr;            prevMaxElem->didRefNode = false;        } else            delete elem;    }    // FIXME: If we ever make a case like this work:    // <table><b><i><form></b></form></i></table>    // Then this check will be too simplistic.  Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.    if (strayTableContent)        m_inStrayTableContent--;    // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.    // In the above example, Goo should stay italic.    // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.        HTMLStackElem* curr = m_blockStack;    HTMLStackElem* residualStyleStack = 0;    unsigned stackDepth = 1;    unsigned redundantStyleCount = 0;    while (curr && curr != maxElem) {        // We will actually schedule this tag for reopening        // after we complete the close of this entire block.        if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {            // We've overloaded the use of stack elements and are just reusing the            // struct with a slightly different meaning to the variables.  Instead of chaining            // from innermost to outermost, we build up a list of all the tags we need to reopen            // from the outermost to the innermost, i.e., residualStyleStack will end up pointing            // to the outermost tag we need to reopen.            // We also set curr->node to be the actual element that corresponds to the ID stored in            // curr->id rather than the node that you should pop to when the element gets pulled off            // the stack.            if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))                redundantStyleCount++;            else                redundantStyleCount = 0;            if (redundantStyleCount < cMaxRedundantTagDepth)                moveOneBlockToStack(residualStyleStack);            else                popOneBlock();        } else            popOneBlock();        curr = m_blockStack;    }    reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.    m_handlingResidualStyleAcrossBlocks = false;}void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent){    // Loop for each tag that needs to be reopened.    while (elem) {        // Create a shallow clone of the DOM node for this element.        RefPtr<Node> newNode = elem->node->cloneNode(false);         reportError(ResidualStyleError, &newNode->localName());        // Append the new node. In the malformed table case, we need to insert before the table,        // which will be the last child.        ExceptionCode ec = 0;        if (malformedTableParent)            malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);        else            m_current->appendChild(newNode, ec);        // FIXME: Is it really OK to ignore the exceptions here?        // Now push a new stack element for this node we just created.        pushBlock(elem->tagName, elem->level);        newNode->beginParsingChildren();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -