📄 htmlparser.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1999 Lars Knoll (knoll@kde.org) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget -- HTML Parser// $Id: htmlparser.cpp,v 1.2 2002/01/28 04:31:02 leon Exp $//#define PARSER_DEBUG//#define COMMENTS_IN_DOM#include "render_interface.h"#include "htmlparser.h"#include "dom_nodeimpl.h"#include "dom_exception.h"#include "html_baseimpl.h"#include "html_blockimpl.h"#include "html_documentimpl.h"#include "html_elementimpl.h"#include "html_formimpl.h"#include "html_headimpl.h"#include "html_imageimpl.h"#include "html_inlineimpl.h"#include "html_listimpl.h"#include "html_miscimpl.h"#include "html_tableimpl.h"#include "html_objectimpl.h"#include "dom_textimpl.h"#include "htmlhashes.h"#include "htmltoken.h"#include "mghtmlview.h"#include "mghtml_part.h"#include "cssproperties.h"#include "render_object.h"#include "kdebug.h"using namespace DOM;using namespace khtml;//----------------------------------------------------------------------------// ### FIXME: move this list to dtd.cpp//// priority of tags. Closing tags of higher priority close tags of lower// priority.// Update this list, whenever you change htmltags.*//// 0 elements with forbidden close tag and text. They don't get pushed// to the stack.// 1 inline elements// 2 form elements// 3 regular block level elements// 4 lists (OL UL DIR MENU)// 5 TD TH// 6 TR// 7 tbody thead tfoot caption// 8 table// 9 body frameset// 10 htmlconst unsigned short tagPriority[] = { 0, // 0 1, // ID_A == 1 1, // ID_ABBR 1, // ID_ACRONYM 3, // ID_ADDRESS 1, // ID_APPLET 0, // ID_AREA 1, // ID_B 0, // ID_BASE 0, // ID_BASEFONT 1, // ID_BDO 1, // ID_BIG 3, // ID_BLOCKQUOTE 9, // ID_BODY 0, // ID_BR 1, // ID_BUTTON 1, // ID_CAPTION 3, // ID_CENTER 1, // ID_CITE 1, // ID_CODE 0, // ID_COL 1, // ID_COLGROUP 3, // ID_DD 1, // ID_DEL 1, // ID_DFN 4, // ID_DIR 3, // ID_DIV 4, // ID_DL 3, // ID_DT 1, // ID_EM 0, // ID_EMBED 3, // ID_FIELDSET 1, // ID_FONT 3, // ID_FORM 0, // ID_FRAME 9, // ID_FRAMESET 3, // ID_H1 3, // ID_H2 3, // ID_H3 3, // ID_H4 3, // ID_H5 3, // ID_H6 9, // ID_HEAD 0, // ID_HR 10, // ID_HTML 1, // ID_I 1, // ID_IFRAME 0, // ID_IMG 0, // ID_INPUT 1, // ID_INS 0, // ID_ISINDEX 1, // ID_KBD 1, // ID_LABEL 1, // ID_LEGEND 3, // ID_LI 0, // ID_LINK 1, // ID_LISTING 1, // ID_MAP 4, // ID_MENU 0, // ID_META 9, // ID_NOEMBED 9, // ID_NOFRAMES 3, // ID_NOSCRIPT 1, // ID_OBJECT 4, // ID_OL 1, // ID_OPTGROUP 2, // ID_OPTION 3, // ID_P 0, // ID_PARAM 1, // ID_PLAIN 1, // ID_PRE 1, // ID_Q 1, // ID_S 1, // ID_SAMP 1, // ID_SCRIPT 2, // ID_SELECT 1, // ID_SMALL 1, // ID_SPAN 1, // ID_STRIKE 1, // ID_STRONG 1, // ID_STYLE 1, // ID_SUB 1, // ID_SUP 8, // ID_TABLE 7, // ID_TBODY 5, // ID_TD 1, // ID_TEXTAREA 7, // ID_TFOOT 5, // ID_TH 7, // ID_THEAD 1, // ID_TITLE 6, // ID_TR 1, // ID_TT 1, // ID_U 4, // ID_UL 1, // ID_VAR 0, // ID_TEXT};/** * @internal */class HTMLStackElem{public: HTMLStackElem( int _id, int _level, DOM::NodeImpl *_node, HTMLStackElem * _next ) : id(_id), level(_level), node(_node), next(_next) { } int id; int level; NodeImpl *node; HTMLStackElem *next;};/** * @internal * * The parser parses tokenized input into the document, building up the * document tree. If the document is wellformed, parsing it is * straightforward. * Unfortunately, people can't write wellformed HTML documents, so the parser * has to be tolerant about errors. * * We have to take care of the following error conditions: * 1. The element being added is explicitly forbidden inside some outer tag. * In this case we should close all tags up to the one, which forbids * the element, and add it afterwards. * 2. We are not allowed to add the element directly. It could be, that * the person writing the document forgot some tag inbetween (or that the * tag inbetween is optional...) This could be the case with the following * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?) * 3. We wan't to add a block element inside to an inline element. Close all * inline elements up to the next higher block element. * 4. If this doesn't help close elements, until we are allowed to add the * element or ignore the tag. * */KHTMLParser::KHTMLParser( MGHTMLView *_parent, HTMLDocumentImpl *doc){ ////kdDebug( 6035 ) << "parser constructor" << endl; HTMLWidget = _parent; document = doc; blockStack = 0; // ID_CLOSE_TAG == Num of tags forbiddenTag = new ushort[ID_CLOSE_TAG+1]; reset();}KHTMLParser::~KHTMLParser(){ freeBlock(); delete [] forbiddenTag;}void KHTMLParser::reset(){ current = document; freeBlock(); // before parsing no tags are forbidden... memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort)); inBody = false; haveFrameSet = false; _inline = false; form = 0; map = 0; head = 0; end = false; discard_until = 0;}void KHTMLParser::parseToken(Token *t){ if (t->id > 2*ID_CLOSE_TAG) { //kdDebug( 6035 ) << "Unknown tag!! tagID = " << t->id << endl; return; } if(discard_until) { if(t->id == discard_until) discard_until = 0; return; }#ifdef PARSER_DEBUG //kdDebug( 6035 ) << "\n\n==> parser: processing token " << t->id << " current = " << current->id() << endl; //kdDebug(6035) << "inline=" << _inline << " inBody=" << inBody << endl;#endif if(t->id > ID_CLOSE_TAG) { processCloseTag(t); return; } // ignore spaces, if we're not inside a paragraph or other inline code if( t->id == ID_TEXT && (!_inline || !inBody) ) {#ifdef PARSER_DEBUG //kdDebug(6035) << "length="<< t->text.length() << "text='" << t->text.string() << "'" << endl;#endif if(t->text.length() == 1 && t->text[0].latin1() == ' ') return; } NodeImpl *n = getElement(t); // just to be sure, and to catch currently unimplemented stuff if(!n) return; // set attributes if(n->isElementNode()) { ElementImpl *e = static_cast<ElementImpl *>(n); e->setAttribute(t->attrs); // take care of optional close tags if(e->endTag() == DOM::OPTIONAL) popBlock(t->id); } // if this tag is forbidden inside the current context, pop // blocks until we are allowed to add it... while(forbiddenTag[t->id]) popOneBlock(); try { insertNode(n); } catch(DOMException) { // we couldn't insert the node...#ifdef PARSER_DEBUG //kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;#endif if (map == n) {#ifdef PARSER_DEBUG //kdDebug( 6035 ) << " --> resetting map!" << endl;#endif map = 0; } if (form == n) {#ifdef PARSER_DEBUG //kdDebug( 6035 ) << " --> resetting form!" << endl;#endif form = 0; } delete n; }}void KHTMLParser::insertNode(NodeImpl *n){ int id = n->id(); // let's be stupid and just try to insert it. // this should work if the document is wellformed try {#ifdef PARSER_DEBUG NodeImpl *tmp = current;#endif NodeImpl *newNode = current->addChild(n); //kdDebug(0) << "hrere" << endl;#ifdef PARSER_DEBUG //kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;#endif // don't push elements without end tag on the stack if(tagPriority[id] != 0) { pushBlock(id, tagPriority[id]); current = newNode; n->attach(HTMLWidget); // ### HACK!!! if(n->id() == ID_BODY) document->createSelector(); if(current->isInline()) _inline = true; } else n->attach(HTMLWidget); if(tagPriority[id] == 0 && n->renderer()) { n->renderer()->calcMinMaxWidth(); if (n->id() == ID_EMBED) n->renderer()->close(); } } catch(DOMException exception) {#ifdef PARSER_DEBUG //kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;#endif // error handling... HTMLElementImpl *e; bool handled = false; // switch according to the element to insert switch(id) { case ID_COMMENT: break; case ID_HEAD: // ### alllow not having <HTML> in at all, as per HTML spec if (!current->isDocumentNode() && !current->id() == ID_HTML )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -