⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 konqueror3 embedded版本, KDE环境下的当家浏览器的嵌入式版本源码包.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/*    This file is part of the KDE libraries    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1999,2001 Lars Knoll (knoll@kde.org)              (C) 2000,2001 Dirk Mueller (mueller@kde.org)              (C) 2003 Apple Computer, Inc.    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,    Boston, MA 02110-1301, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget -- HTML Parser// #define PARSER_DEBUG#include "dom/dom_exception.h"#include "html/html_baseimpl.h"#include "html/html_blockimpl.h"#include "html/html_documentimpl.h"#include "html/html_elementimpl.h"#include "html/html_formimpl.h"#include "html/html_headimpl.h"#include "html/html_imageimpl.h"#include "html/html_inlineimpl.h"#include "html/html_listimpl.h"#include "html/html_miscimpl.h"#include "html/html_tableimpl.h"#include "html/html_objectimpl.h"#include "xml/dom_textimpl.h"#include "xml/dom_nodeimpl.h"#include "misc/htmlhashes.h"#include "html/htmltokenizer.h"#include "khtmlview.h"#include "khtml_part.h"#include "khtml_factory.h"#include "css/cssproperties.h"#include "css/cssvalues.h"#include "css/csshelper.h"#include "rendering/render_object.h"#include "html/htmlparser.h"#include <kdebug.h>#include <klocale.h>using namespace DOM;using namespace khtml;//----------------------------------------------------------------------------/** * @internal */class HTMLStackElem{public:    HTMLStackElem( int _id,                   int _level,                   DOM::NodeImpl *_node,		   bool _inline,                   HTMLStackElem * _next )        :        id(_id),        level(_level),        strayTableContent(false),	m_inline(_inline),        node(_node),        next(_next)        { node->ref(); }    ~HTMLStackElem()        { node->deref(); }    void setNode(NodeImpl* newNode)    {        newNode->ref();        node->deref();        node = newNode;    }    int       id;    int       level;    bool      strayTableContent;    bool m_inline;    NodeImpl *node;    HTMLStackElem *next;};/** * @internal * * The parser parses tokenized input into the document, building up the * document tree. If the document is wellformed, parsing it is * straightforward. * Unfortunately, people can't write wellformed HTML documents, so the parser * has to be tolerant about errors. * * We have to take care of the following error conditions: * 1. The element being added is explicitly forbidden inside some outer tag. *    In this case we should close all tags up to the one, which forbids *    the element, and add it afterwards. * 2. We are not allowed to add the element directly. It could be, that *    the person writing the document forgot some tag inbetween (or that the *    tag inbetween is optional...) This could be the case with the following *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?) * 3. We wan't to add a block element inside to an inline element. Close all *    inline elements up to the next higher block element. * 4. If this doesn't help close elements, until we are allowed to add the *    element or ignore the tag. * */KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentPtr *doc){    //kdDebug( 6035 ) << "parser constructor" << endl;#if SPEED_DEBUG > 0    qt.start();#endif    HTMLWidget    = _parent;    document      = doc;    document->ref();    blockStack = 0;    current = 0;    // ID_CLOSE_TAG == Num of tags    forbiddenTag = new ushort[ID_CLOSE_TAG+1];    reset();}KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentPtr *doc ){    HTMLWidget = 0;    document = doc;    document->ref();    forbiddenTag = new ushort[ID_CLOSE_TAG+1];    blockStack = 0;    current = 0;    reset();    setCurrent(i);    inBody = true;}KHTMLParser::~KHTMLParser(){#if SPEED_DEBUG > 0    kdDebug( ) << "TIME: parsing time was = " << qt.elapsed() << endl;#endif    freeBlock();    if (current) current->deref();    document->deref();    delete [] forbiddenTag;    delete isindex;}void KHTMLParser::reset(){    setCurrent ( document->document() );    freeBlock();    // before parsing no tags are forbidden...    memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort));    inBody = false;    haveFrameSet = false;    haveContent = false;    haveBody = false;    haveTitle = false;    inSelect = false;    inStrayTableContent = 0;    m_inline = false;    form = 0;    map = 0;    head = 0;    end = false;    isindex = 0;    discard_until = 0;}void KHTMLParser::parseToken(Token *t){    if (t->tid > 2*ID_CLOSE_TAG)    {      kdDebug( 6035 ) << "Unknown tag!! tagID = " << t->tid << endl;      return;    }    if(discard_until) {        if(t->tid == discard_until)            discard_until = 0;        // do not skip </iframe>        if ( discard_until || current->id() + ID_CLOSE_TAG != t->tid )            return;    }#ifdef PARSER_DEBUG    kdDebug( 6035 ) << "\n\n==> parser: processing token " << getTagName(t->tid) << "(" << t->tid << ")"                    << " current = " << getTagName(current->id()) << "(" << current->id() << ")" << endl;    kdDebug(6035) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent << endl;#endif    // holy shit. apparently some sites use </br> instead of <br>    // be compatible with IE and NS    if(t->tid == ID_BR+ID_CLOSE_TAG && document->document()->inCompatMode())        t->tid -= ID_CLOSE_TAG;    if(t->tid > ID_CLOSE_TAG)    {        processCloseTag(t);        return;    }    // ignore spaces, if we're not inside a paragraph or other inline code    if( t->tid == ID_TEXT && t->text ) {        if(inBody && !skipMode() &&           current->id() != ID_STYLE && current->id() != ID_TITLE &&           current->id() != ID_SCRIPT &&           !t->text->containsOnlyWhitespace()) haveContent = true;#ifdef PARSER_DEBUG        kdDebug(6035) << "length="<< t->text->l << " text='" << QConstString(t->text->s, t->text->l).string() << "'" << endl;#endif    }    NodeImpl *n = getElement(t);    // just to be sure, and to catch currently unimplemented stuff    if(!n)        return;    // set attributes    if(n->isElementNode() && t->tid != ID_ISINDEX)    {        ElementImpl *e = static_cast<ElementImpl *>(n);        e->setAttributeMap(t->attrs);        // take care of optional close tags        if(endTag[e->id()] == DOM::OPTIONAL)            popBlock(t->tid);    }    // if this tag is forbidden inside the current context, pop    // blocks until we are allowed to add it...    while(forbiddenTag[t->tid]) {#ifdef PARSER_DEBUG        kdDebug( 6035 ) << "t->id: " << t->tid << " is forbidden :-( " << endl;#endif        popOneBlock();    }    // sometimes flat doesn't make sense    switch(t->tid) {    case ID_SELECT:    case ID_OPTION:        t->flat = false;    }    // the tokenizer needs the feedback for space discarding    if ( tagPriority[t->tid] == 0 )	t->flat = true;    if ( !insertNode(n, t->flat) ) {        // we couldn't insert the node...#ifdef PARSER_DEBUG        kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;#endif        if (map == n)        {#ifdef PARSER_DEBUG            kdDebug( 6035 ) << "  --> resetting map!" << endl;#endif            map = 0;        }        if (form == n)        {#ifdef PARSER_DEBUG            kdDebug( 6035 ) << "   --> resetting form!" << endl;#endif            form = 0;        }        delete n;    }}static bool isTableRelatedTag(int id){    return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||            id == ID_TH);}bool KHTMLParser::insertNode(NodeImpl *n, bool flat){    int id = n->id();    // let's be stupid and just try to insert it.    // this should work if the document is wellformed#ifdef PARSER_DEBUG    NodeImpl *tmp = current;#endif    NodeImpl *newNode = current->addChild(n);    if ( newNode ) {#ifdef PARSER_DEBUG        kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;#endif	// don't push elements without end tag on the stack        if(tagPriority[id] != 0 && !flat) {#if SPEED_DEBUG < 2            if(!n->attached() && HTMLWidget )                n->attach();#endif	    if(n->isInline()) m_inline = true;            pushBlock(id, tagPriority[id]);            setCurrent( newNode );        } else {#if SPEED_DEBUG < 2            if(!n->attached() && HTMLWidget)                n->attach();            if (n->maintainsState()) {                document->document()->registerMaintainsState(n);                QString state(document->document()->nextState());                if (!state.isNull()) n->restoreState(state);            }            n->close();#endif	    if(n->isInline()) m_inline = true;        }#if SPEED_DEBUG < 1        if(tagPriority[id] == 0 && n->renderer())            n->renderer()->calcMinMaxWidth();#endif        return true;    } else {#ifdef PARSER_DEBUG        kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;#endif        // error handling...        HTMLElementImpl *e;        bool handled = false;        // switch according to the element to insert        switch(id)        {        case ID_TR:        case ID_TH:        case ID_TD:            if (inStrayTableContent && !isTableRelatedTag(current->id())) {                // pop out to the nearest enclosing table-related tag.                while (blockStack && !isTableRelatedTag(current->id()))                    popOneBlock();                return insertNode(n);            }            break;        case ID_COMMENT:            break;        case ID_HEAD:            // ### allow not having <HTML> in at all, as per HTML spec            if (!current->isDocumentNode() && current->id() != ID_HTML )                return false;            break;        case ID_META:        case ID_LINK:        case ID_ISINDEX:        case ID_BASE:            if( !head )                createHead();            if( head ) {                if ( head->addChild(n) ) {#if SPEED_DEBUG < 2                    if(!n->attached() && HTMLWidget)                        n->attach();#endif                }                return true;            }            break;        case ID_HTML:            if (!current->isDocumentNode() ) {		if ( doc()->firstChild()->id() == ID_HTML) {		    // we have another <HTML> element.... apply attributes to existing one		    // make sure we don't overwrite already existing attributes		    NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);		    NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->firstChild())->attributes(false);		    bool changed = false;		    for (unsigned long l = 0; map && l < map->length(); ++l) {			NodeImpl::Id attrId = map->idAt(l);			DOMStringImpl *attrValue = map->valueAt(l);			changed = !bmap->getValue(attrId);			bmap->setValue(attrId,attrValue);		    }		    if ( changed )			doc()->recalcStyle( NodeImpl::Inherit );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -