⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 手机浏览器源码程序,功能强大
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/*
    This file is part of the KDE libraries

    Copyright (C) 1997 Martin Jones (mjones@kde.org)
              (C) 1997 Torben Weis (weis@kde.org)
              (C) 1999,2001 Lars Knoll (knoll@kde.org)
              (C) 2000,2001 Dirk Mueller (mueller@kde.org)
    Copyright (C) 2004 Apple Computer, Inc.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.
*/
//----------------------------------------------------------------------------
//
// KDE HTML Widget -- HTML Parser
//#define PARSER_DEBUG

#include "html/htmlparser.h"

#include "dom/dom_exception.h"

#include "html/html_baseimpl.h"
#include "html/html_blockimpl.h"
#include "html/html_canvasimpl.h"
#include "html/html_documentimpl.h"
#include "html/html_elementimpl.h"
#include "html/html_formimpl.h"
#include "html/html_headimpl.h"
#include "html/html_imageimpl.h"
#include "html/html_inlineimpl.h"
#include "html/html_listimpl.h"
#include "html/html_miscimpl.h"
#include "html/html_tableimpl.h"
#include "html/html_objectimpl.h"
#include "xml/dom_textimpl.h"
#include "xml/dom_nodeimpl.h"
#include "misc/htmlhashes.h"
#include "html/htmltokenizer.h"
#include "khtmlview.h"
#include "khtml_part.h"
#include "css/cssproperties.h"
#include "css/cssvalues.h"

#include "rendering/render_object.h"

#include <kdebug.h>
#include <klocale.h>

using namespace DOM;
using namespace khtml;

//----------------------------------------------------------------------------

/**
 * @internal
 */
class HTMLStackElem
OOM_MODIFIED
{
public:
    HTMLStackElem( int _id,
                   int _level,
                   DOM::NodeImpl *_node,
                   HTMLStackElem * _next
        )
        :
        id(_id),
        level(_level),
        strayTableContent(false),
        node(_node),
        next(_next)
        { if(node) node->ref(); }

        ~HTMLStackElem() { if(node) node->deref(); }

        void setNode( DOM::NodeImpl *_node ) {  if(node) node->deref(); node=_node; if(node) node->ref(); }

    int       id;
    int       level;
    bool      strayTableContent;
    NodeImpl *node;
    HTMLStackElem *next;
};

/**
 * @internal
 *
 * The parser parses tokenized input into the document, building up the
 * document tree. If the document is wellformed, parsing it is
 * straightforward.
 * Unfortunately, people can't write wellformed HTML documents, so the parser
 * has to be tolerant about errors.
 *
 * We have to take care of the following error conditions:
 * 1. The element being added is explicitly forbidden inside some outer tag.
 *    In this case we should close all tags up to the one, which forbids
 *    the element, and add it afterwards.
 * 2. We are not allowed to add the element directly. It could be, that
 *    the person writing the document forgot some tag inbetween (or that the
 *    tag inbetween is optional...) This could be the case with the following
 *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
 * 3. We wan't to add a block element inside to an inline element. Close all
 *    inline elements up to the next higher block element.
 * 4. If this doesn't help close elements, until we are allowed to add the
 *    element or ignore the tag.
 *
 */
KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentPtr *doc, bool includesComments)
    : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
{
    //kdDebug( 6035 ) << "parser constructor" << endl;
#if SPEED_DEBUG > 0
    qt.start();
#endif

    HTMLWidget    = _parent;
    document      = doc;
    document->ref();

    blockStack = 0;

    reset();
}

KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentPtr *doc, bool includesComments)
    : current(0), currentIsReferenced(false), includesCommentsInDOM(includesComments)
{
    HTMLWidget = 0;
    document = doc;
    document->ref();

    blockStack = 0;

    reset();
    setCurrent(i);
    inBody = true;
}

KHTMLParser::~KHTMLParser()
{
#if SPEED_DEBUG > 0
    kdDebug( ) << "TIME: parsing time was = " << qt.elapsed() << endl;
#endif

    freeBlock();

    setCurrent(0);

    document->deref();

    if (isindex)
        isindex->deref();
}

void KHTMLParser::reset()
{
    setCurrent(doc());

    freeBlock();

    // before parsing, no tags are forbidden
    memset(forbiddenTag, 0, sizeof(forbiddenTag));

    inBody = false;
    haveFrameSet = false;
    haveContent = false;
    inSelect = false;
    inStrayTableContent = 0;

    form = 0;
    map = 0;
    head = 0;
    end = false;
    isindex = 0;

    discard_until = 0;
}

void KHTMLParser::setCurrent(DOM::NodeImpl *newCurrent)
{
    bool newCurrentIsReferenced = newCurrent && newCurrent != doc();
    if (newCurrentIsReferenced)
        newCurrent->ref();
    if (current && currentIsReferenced)
        current->deref();
    current = newCurrent;
    currentIsReferenced = newCurrentIsReferenced;
}

void KHTMLParser::parseToken(Token *t)
{
    if(discard_until) {
        if(t->id == discard_until)
            discard_until = 0;

        // do not skip </iframe>
        if ( discard_until || current->id() + ID_CLOSE_TAG != t->id )
            return;
    }

#ifdef PARSER_DEBUG
    kdDebug( 6035 ) << "\n\n==> parser: processing token " << getTagName(t->id).string() << "(" << t->id << ")"
                    << " current = " << getTagName(current->id()).string() << "(" << current->id() << ")" << endl;
    kdDebug(6035) << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << endl;
#endif

    // holy shit. apparently some sites use </br> instead of <br>
    // be compatible with IE and NS
    if (t->id == ID_BR + ID_CLOSE_TAG && doc()->inCompatMode())
        t->id = ID_BR;

    if (t->id > ID_CLOSE_TAG)
    {
        processCloseTag(t);
        return;
    }

    // ignore spaces, if we're not inside a paragraph or other inline code
    if( t->id == ID_TEXT && t->text ) {
        if(inBody && !skipMode() && current->id() != ID_STYLE
            && current->id() != ID_TITLE && current->id() != ID_SCRIPT &&
            !t->text->containsOnlyWhitespace())
            haveContent = true;
#ifdef PARSER_DEBUG
        kdDebug(6035) << "length="<< t->text->l << " text='" << QConstString(t->text->s, t->text->l).string() << "'" << endl;
#endif
    }

    NodeImpl *n = getElement(t);
    // just to be sure, and to catch currently unimplemented stuff
    if(!n)
        return;

    Node protectNode(n);

    // set attributes
    if(n->isElementNode())
    {
        ElementImpl *e = static_cast<ElementImpl *>(n);
        e->setAttributeMap(t->attrs);

        // take care of optional close tags
        if(endTagRequirement(e->id()) == DOM::OPTIONAL)
            popBlock(t->id);

        if (isHeaderTag(t->id))
            // Do not allow two header tags to be nested if the intervening tags are inlines.
            popNestedHeaderTag();
    }

    // if this tag is forbidden inside the current context, pop
    // blocks until we are allowed to add it...
    while (blockStack && t->id <= ID_LAST_TAG && forbiddenTag[t->id]) {
#ifdef PARSER_DEBUG
        kdDebug( 6035 ) << "t->id: " << t->id << " is forbidden :-( " << endl;
#endif
        popOneBlock();
    }

    if (!insertNode(n, t->flat))
    {
        // we couldn't insert the node...

        if(n->isElementNode())
        {
            ElementImpl *e = static_cast<ElementImpl *>(n);
            e->setAttributeMap(0);
        }

#ifdef PARSER_DEBUG
        kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;
#endif
        if (map == n)
        {
#ifdef PARSER_DEBUG
            kdDebug( 6035 ) << "  --> resetting map!" << endl;
#endif
            map = 0;
        }
        if (form == n)
        {
#ifdef PARSER_DEBUG
            kdDebug( 6035 ) << "   --> resetting form!" << endl;
#endif
            form = 0;
        }
    }
}

static bool isTableRelatedTag(int id)
{
    return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
            id == ID_TH);
}

bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
{
    Node protectNode(n);

    int id = n->id();

    // let's be stupid and just try to insert it.
    // this should work if the document is wellformed
#ifdef PARSER_DEBUG
    NodeImpl *tmp = current;
#endif
    NodeImpl *newNode = current->addChild(n);
    if ( newNode ) {
#ifdef PARSER_DEBUG
        kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;
#endif
        // don't push elements without end tag on the stack
        if(tagPriority(id) != 0 && !flat)
        {
            pushBlock(id, tagPriority(id));
            if (newNode == current)
                popBlock(id);
            else
                setCurrent(newNode);
#if SPEED_DEBUG < 2
            if(!n->attached() && HTMLWidget)
                n->attach();
#endif
        }
        else {
#if SPEED_DEBUG < 2
            if(!n->attached() && HTMLWidget)
                n->attach();
            if (n->maintainsState()) {
                doc()->registerMaintainsState(n);
                QStringList &states = doc()->restoreState();
                if (!states.isEmpty())
                    n->restoreState(states);
            }
            n->closeRenderer();
#endif
        }

        return true;
    } else {
#ifdef PARSER_DEBUG
        kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;
#endif
        // error handling...
        HTMLElementImpl *e;
        bool handled = false;

        // switch according to the element to insert
        switch(id)
        {
        case ID_TR:
        case ID_TH:
        case ID_TD:
            if (inStrayTableContent && !isTableRelatedTag(current->id())) {
                // pop out to the nearest enclosing table-related tag.
                while (blockStack && !isTableRelatedTag(current->id()))
                    popOneBlock();
                return insertNode(n);
            }
            break;
        case ID_COMMENT:
            break;
        case ID_HEAD:
            // ### alllow not having <HTML> in at all, as per HTML spec
            if (!current->isDocumentNode() && current->id() != ID_HTML )
                return false;
            break;
            // We can deal with a base, meta and link element in the body, by just adding the element to head.
        case ID_META:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -