⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 monqueror一个很具有参考价值的源玛
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/*    This file is part of the KDE libraries    Copyright (C) 1997 Martin Jones (mjones@kde.org)              (C) 1997 Torben Weis (weis@kde.org)              (C) 1999 Lars Knoll (knoll@kde.org)    This library is free software; you can redistribute it and/or    modify it under the terms of the GNU Library General Public    License as published by the Free Software Foundation; either    version 2 of the License, or (at your option) any later version.    This library is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU    Library General Public License for more details.    You should have received a copy of the GNU Library General Public License    along with this library; see the file COPYING.LIB.  If not, write to    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,    Boston, MA 02111-1307, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget -- HTML Parser// $Id: htmlparser.cpp,v 1.2 2002/01/28 04:31:02 leon Exp $//#define PARSER_DEBUG//#define COMMENTS_IN_DOM#include "render_interface.h"#include "htmlparser.h"#include "dom_nodeimpl.h"#include "dom_exception.h"#include "html_baseimpl.h"#include "html_blockimpl.h"#include "html_documentimpl.h"#include "html_elementimpl.h"#include "html_formimpl.h"#include "html_headimpl.h"#include "html_imageimpl.h"#include "html_inlineimpl.h"#include "html_listimpl.h"#include "html_miscimpl.h"#include "html_tableimpl.h"#include "html_objectimpl.h"#include "dom_textimpl.h"#include "htmlhashes.h"#include "htmltoken.h"#include "mghtmlview.h"#include "mghtml_part.h"#include "cssproperties.h"#include "render_object.h"#include "kdebug.h"using namespace DOM;using namespace khtml;//----------------------------------------------------------------------------// ### FIXME: move this list to dtd.cpp//// priority of tags. Closing tags of higher priority close tags of lower// priority.// Update this list, whenever you change htmltags.*//// 0 elements with forbidden close tag and text. They don't get pushed//   to the stack.// 1 inline elements// 2 form elements// 3 regular block level elements// 4 lists (OL UL DIR MENU)// 5 TD TH// 6 TR// 7 tbody thead tfoot caption// 8 table// 9 body frameset// 10 htmlconst unsigned short tagPriority[] = {    0, // 0    1, // ID_A == 1    1, // ID_ABBR    1, // ID_ACRONYM    3, // ID_ADDRESS    1, // ID_APPLET    0, // ID_AREA    1, // ID_B    0, // ID_BASE    0, // ID_BASEFONT    1, // ID_BDO    1, // ID_BIG    3, // ID_BLOCKQUOTE    9, // ID_BODY    0, // ID_BR    1, // ID_BUTTON    1, // ID_CAPTION    3, // ID_CENTER    1, // ID_CITE    1, // ID_CODE    0, // ID_COL    1, // ID_COLGROUP    3, // ID_DD    1, // ID_DEL    1, // ID_DFN    4, // ID_DIR    3, // ID_DIV    4, // ID_DL    3, // ID_DT    1, // ID_EM    0, // ID_EMBED    3, // ID_FIELDSET    1, // ID_FONT    3, // ID_FORM    0, // ID_FRAME    9, // ID_FRAMESET    3, // ID_H1    3, // ID_H2    3, // ID_H3    3, // ID_H4    3, // ID_H5    3, // ID_H6    9, // ID_HEAD    0, // ID_HR    10, // ID_HTML    1, // ID_I    1, // ID_IFRAME    0, // ID_IMG    0, // ID_INPUT    1, // ID_INS    0, // ID_ISINDEX    1, // ID_KBD    1, // ID_LABEL    1, // ID_LEGEND    3, // ID_LI    0, // ID_LINK    1, // ID_LISTING    1, // ID_MAP    4, // ID_MENU    0, // ID_META    9, // ID_NOEMBED    9, // ID_NOFRAMES    3, // ID_NOSCRIPT    1, // ID_OBJECT    4, // ID_OL    1, // ID_OPTGROUP    2, // ID_OPTION    3, // ID_P    0, // ID_PARAM    1, // ID_PLAIN    1, // ID_PRE    1, // ID_Q    1, // ID_S    1, // ID_SAMP    1, // ID_SCRIPT    2, // ID_SELECT    1, // ID_SMALL    1, // ID_SPAN    1, // ID_STRIKE    1, // ID_STRONG    1, // ID_STYLE    1, // ID_SUB    1, // ID_SUP    8, // ID_TABLE    7, // ID_TBODY    5, // ID_TD    1, // ID_TEXTAREA    7, // ID_TFOOT    5, // ID_TH    7, // ID_THEAD    1, // ID_TITLE    6, // ID_TR    1, // ID_TT    1, // ID_U    4, // ID_UL    1, // ID_VAR    0, // ID_TEXT};/** * @internal */class HTMLStackElem{public:    HTMLStackElem( int _id,                   int _level,                   DOM::NodeImpl *_node,                   HTMLStackElem * _next        )        :        id(_id),        level(_level),        node(_node),        next(_next)        { }    int       id;    int       level;    NodeImpl *node;    HTMLStackElem *next;};/** * @internal * * The parser parses tokenized input into the document, building up the * document tree. If the document is wellformed, parsing it is * straightforward. * Unfortunately, people can't write wellformed HTML documents, so the parser * has to be tolerant about errors. * * We have to take care of the following error conditions: * 1. The element being added is explicitly forbidden inside some outer tag. *    In this case we should close all tags up to the one, which forbids *    the element, and add it afterwards. * 2. We are not allowed to add the element directly. It could be, that *    the person writing the document forgot some tag inbetween (or that the *    tag inbetween is optional...) This could be the case with the following *    tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?) * 3. We wan't to add a block element inside to an inline element. Close all *    inline elements up to the next higher block element. * 4. If this doesn't help close elements, until we are allowed to add the *    element or ignore the tag. * */KHTMLParser::KHTMLParser( MGHTMLView *_parent,                          HTMLDocumentImpl *doc){    ////kdDebug( 6035 ) << "parser constructor" << endl;    HTMLWidget    = _parent;    document      = doc;    blockStack = 0;    // ID_CLOSE_TAG == Num of tags    forbiddenTag = new ushort[ID_CLOSE_TAG+1];    reset();}KHTMLParser::~KHTMLParser(){    freeBlock();    delete [] forbiddenTag;}void KHTMLParser::reset(){    current = document;    freeBlock();    // before parsing no tags are forbidden...    memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort));    inBody = false;    haveFrameSet = false;    _inline = false;    form = 0;    map = 0;    head = 0;    end = false;    discard_until = 0;}void KHTMLParser::parseToken(Token *t){    if (t->id > 2*ID_CLOSE_TAG)    {      //kdDebug( 6035 ) << "Unknown tag!! tagID = " << t->id << endl;      return;    }    if(discard_until)    {        if(t->id == discard_until)            discard_until = 0;        return;    }#ifdef PARSER_DEBUG    //kdDebug( 6035 ) << "\n\n==> parser: processing token " << t->id << " current = " << current->id() << endl;        //kdDebug(6035) << "inline=" << _inline << " inBody=" << inBody << endl;#endif    if(t->id > ID_CLOSE_TAG)    {        processCloseTag(t);        return;    }    // ignore spaces, if we're not inside a paragraph or other inline code    if( t->id == ID_TEXT && (!_inline  || !inBody) )    {#ifdef PARSER_DEBUG        //kdDebug(6035) << "length="<< t->text.length() << "text='" << t->text.string() << "'" << endl;#endif        if(t->text.length() == 1 && t->text[0].latin1() == ' ')            return;    }    NodeImpl *n = getElement(t);    // just to be sure, and to catch currently unimplemented stuff    if(!n)        return;    // set attributes    if(n->isElementNode())    {        ElementImpl *e = static_cast<ElementImpl *>(n);        e->setAttribute(t->attrs);        // take care of optional close tags        if(e->endTag() == DOM::OPTIONAL)            popBlock(t->id);    }    // if this tag is forbidden inside the current context, pop    // blocks until we are allowed to add it...    while(forbiddenTag[t->id]) popOneBlock();    try    {        insertNode(n);    }    catch(DOMException)    {									        // we couldn't insert the node...#ifdef PARSER_DEBUG        //kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;#endif        if (map == n)        {#ifdef PARSER_DEBUG            //kdDebug( 6035 ) << "  --> resetting map!" << endl;#endif            map = 0;        }        if (form == n)        {#ifdef PARSER_DEBUG            //kdDebug( 6035 ) << "   --> resetting form!" << endl;#endif            form = 0;        }        delete n;    }}void KHTMLParser::insertNode(NodeImpl *n){    int id = n->id();    // let's be stupid and just try to insert it.    // this should work if the document is wellformed    try    {#ifdef PARSER_DEBUG        NodeImpl *tmp = current;#endif        NodeImpl *newNode = current->addChild(n);        //kdDebug(0) << "hrere" << endl;#ifdef PARSER_DEBUG        //kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;#endif        // don't push elements without end tag on the stack        if(tagPriority[id] != 0)        {            pushBlock(id, tagPriority[id]);            current = newNode;            n->attach(HTMLWidget);            // ### HACK!!!            if(n->id() == ID_BODY)                document->createSelector();            if(current->isInline()) _inline = true;        }        else            n->attach(HTMLWidget);        if(tagPriority[id] == 0 && n->renderer())        {            n->renderer()->calcMinMaxWidth();            if (n->id() == ID_EMBED) n->renderer()->close();        }    }    catch(DOMException exception)    {#ifdef PARSER_DEBUG        //kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;#endif        // error handling...        HTMLElementImpl *e;        bool handled = false;        // switch according to the element to insert        switch(id)        {        case ID_COMMENT:            break;        case ID_HEAD:            // ### alllow not having <HTML> in at all, as per HTML spec            if (!current->isDocumentNode() && !current->id() == ID_HTML )

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -