📄 htmlparser.cpp
字号:
/* This file is part of the KDE libraries Copyright (C) 1997 Martin Jones (mjones@kde.org) (C) 1997 Torben Weis (weis@kde.org) (C) 1999,2001 Lars Knoll (knoll@kde.org) (C) 2000,2001 Dirk Mueller (mueller@kde.org) (C) 2003 Apple Computer, Inc. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*///----------------------------------------------------------------------------//// KDE HTML Widget -- HTML Parser// #define PARSER_DEBUG#include "dom/dom_exception.h"#include "html/html_baseimpl.h"#include "html/html_blockimpl.h"#include "html/html_documentimpl.h"#include "html/html_elementimpl.h"#include "html/html_formimpl.h"#include "html/html_headimpl.h"#include "html/html_imageimpl.h"#include "html/html_inlineimpl.h"#include "html/html_listimpl.h"#include "html/html_miscimpl.h"#include "html/html_tableimpl.h"#include "html/html_objectimpl.h"#include "xml/dom_textimpl.h"#include "xml/dom_nodeimpl.h"#include "misc/htmlhashes.h"#include "html/htmltokenizer.h"#include "khtmlview.h"#include "khtml_part.h"#include "khtml_factory.h"#include "css/cssproperties.h"#include "css/cssvalues.h"#include "css/csshelper.h"#include "rendering/render_object.h"#include "html/htmlparser.h"#include <kdebug.h>#include <klocale.h>using namespace DOM;using namespace khtml;//----------------------------------------------------------------------------/** * @internal */class HTMLStackElem{public: HTMLStackElem( int _id, int _level, DOM::NodeImpl *_node, bool _inline, HTMLStackElem * _next ) : id(_id), level(_level), strayTableContent(false), m_inline(_inline), node(_node), next(_next) { node->ref(); } ~HTMLStackElem() { node->deref(); } void setNode(NodeImpl* newNode) { newNode->ref(); node->deref(); node = newNode; } int id; int level; bool strayTableContent; bool m_inline; NodeImpl *node; HTMLStackElem *next;};/** * @internal * * The parser parses tokenized input into the document, building up the * document tree. If the document is wellformed, parsing it is * straightforward. * Unfortunately, people can't write wellformed HTML documents, so the parser * has to be tolerant about errors. * * We have to take care of the following error conditions: * 1. The element being added is explicitly forbidden inside some outer tag. * In this case we should close all tags up to the one, which forbids * the element, and add it afterwards. * 2. We are not allowed to add the element directly. It could be, that * the person writing the document forgot some tag inbetween (or that the * tag inbetween is optional...) This could be the case with the following * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?) * 3. We wan't to add a block element inside to an inline element. Close all * inline elements up to the next higher block element. * 4. If this doesn't help close elements, until we are allowed to add the * element or ignore the tag. * */KHTMLParser::KHTMLParser( KHTMLView *_parent, DocumentPtr *doc){ //kdDebug( 6035 ) << "parser constructor" << endl;#if SPEED_DEBUG > 0 qt.start();#endif HTMLWidget = _parent; document = doc; document->ref(); blockStack = 0; current = 0; // ID_CLOSE_TAG == Num of tags forbiddenTag = new ushort[ID_CLOSE_TAG+1]; reset();}KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentPtr *doc ){ HTMLWidget = 0; document = doc; document->ref(); forbiddenTag = new ushort[ID_CLOSE_TAG+1]; blockStack = 0; current = 0; reset(); setCurrent(i); inBody = true;}KHTMLParser::~KHTMLParser(){#if SPEED_DEBUG > 0 kdDebug( ) << "TIME: parsing time was = " << qt.elapsed() << endl;#endif freeBlock(); if (current) current->deref(); document->deref(); delete [] forbiddenTag; delete isindex;}void KHTMLParser::reset(){ setCurrent ( document->document() ); freeBlock(); // before parsing no tags are forbidden... memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort)); inBody = false; haveFrameSet = false; haveContent = false; haveBody = false; haveTitle = false; inSelect = false; inStrayTableContent = 0; m_inline = false; form = 0; map = 0; head = 0; end = false; isindex = 0; discard_until = 0;}void KHTMLParser::parseToken(Token *t){ if (t->tid > 2*ID_CLOSE_TAG) { kdDebug( 6035 ) << "Unknown tag!! tagID = " << t->tid << endl; return; } if(discard_until) { if(t->tid == discard_until) discard_until = 0; // do not skip </iframe> if ( discard_until || current->id() + ID_CLOSE_TAG != t->tid ) return; }#ifdef PARSER_DEBUG kdDebug( 6035 ) << "\n\n==> parser: processing token " << getTagName(t->tid) << "(" << t->tid << ")" << " current = " << getTagName(current->id()) << "(" << current->id() << ")" << endl; kdDebug(6035) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent << endl;#endif // holy shit. apparently some sites use </br> instead of <br> // be compatible with IE and NS if(t->tid == ID_BR+ID_CLOSE_TAG && document->document()->inCompatMode()) t->tid -= ID_CLOSE_TAG; if(t->tid > ID_CLOSE_TAG) { processCloseTag(t); return; } // ignore spaces, if we're not inside a paragraph or other inline code if( t->tid == ID_TEXT && t->text ) { if(inBody && !skipMode() && current->id() != ID_STYLE && current->id() != ID_TITLE && current->id() != ID_SCRIPT && !t->text->containsOnlyWhitespace()) haveContent = true;#ifdef PARSER_DEBUG kdDebug(6035) << "length="<< t->text->l << " text='" << QConstString(t->text->s, t->text->l).string() << "'" << endl;#endif } NodeImpl *n = getElement(t); // just to be sure, and to catch currently unimplemented stuff if(!n) return; // set attributes if(n->isElementNode() && t->tid != ID_ISINDEX) { ElementImpl *e = static_cast<ElementImpl *>(n); e->setAttributeMap(t->attrs); // take care of optional close tags if(endTag[e->id()] == DOM::OPTIONAL) popBlock(t->tid); } // if this tag is forbidden inside the current context, pop // blocks until we are allowed to add it... while(forbiddenTag[t->tid]) {#ifdef PARSER_DEBUG kdDebug( 6035 ) << "t->id: " << t->tid << " is forbidden :-( " << endl;#endif popOneBlock(); } // sometimes flat doesn't make sense switch(t->tid) { case ID_SELECT: case ID_OPTION: t->flat = false; } // the tokenizer needs the feedback for space discarding if ( tagPriority[t->tid] == 0 ) t->flat = true; if ( !insertNode(n, t->flat) ) { // we couldn't insert the node...#ifdef PARSER_DEBUG kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;#endif if (map == n) {#ifdef PARSER_DEBUG kdDebug( 6035 ) << " --> resetting map!" << endl;#endif map = 0; } if (form == n) {#ifdef PARSER_DEBUG kdDebug( 6035 ) << " --> resetting form!" << endl;#endif form = 0; } delete n; }}static bool isTableRelatedTag(int id){ return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD || id == ID_TH);}bool KHTMLParser::insertNode(NodeImpl *n, bool flat){ int id = n->id(); // let's be stupid and just try to insert it. // this should work if the document is wellformed#ifdef PARSER_DEBUG NodeImpl *tmp = current;#endif NodeImpl *newNode = current->addChild(n); if ( newNode ) {#ifdef PARSER_DEBUG kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;#endif // don't push elements without end tag on the stack if(tagPriority[id] != 0 && !flat) {#if SPEED_DEBUG < 2 if(!n->attached() && HTMLWidget ) n->attach();#endif if(n->isInline()) m_inline = true; pushBlock(id, tagPriority[id]); setCurrent( newNode ); } else {#if SPEED_DEBUG < 2 if(!n->attached() && HTMLWidget) n->attach(); if (n->maintainsState()) { document->document()->registerMaintainsState(n); QString state(document->document()->nextState()); if (!state.isNull()) n->restoreState(state); } n->close();#endif if(n->isInline()) m_inline = true; }#if SPEED_DEBUG < 1 if(tagPriority[id] == 0 && n->renderer()) n->renderer()->calcMinMaxWidth();#endif return true; } else {#ifdef PARSER_DEBUG kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;#endif // error handling... HTMLElementImpl *e; bool handled = false; // switch according to the element to insert switch(id) { case ID_TR: case ID_TH: case ID_TD: if (inStrayTableContent && !isTableRelatedTag(current->id())) { // pop out to the nearest enclosing table-related tag. while (blockStack && !isTableRelatedTag(current->id())) popOneBlock(); return insertNode(n); } break; case ID_COMMENT: break; case ID_HEAD: // ### allow not having <HTML> in at all, as per HTML spec if (!current->isDocumentNode() && current->id() != ID_HTML ) return false; break; case ID_META: case ID_LINK: case ID_ISINDEX: case ID_BASE: if( !head ) createHead(); if( head ) { if ( head->addChild(n) ) {#if SPEED_DEBUG < 2 if(!n->attached() && HTMLWidget) n->attach();#endif } return true; } break; case ID_HTML: if (!current->isDocumentNode() ) { if ( doc()->firstChild()->id() == ID_HTML) { // we have another <HTML> element.... apply attributes to existing one // make sure we don't overwrite already existing attributes NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true); NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->firstChild())->attributes(false); bool changed = false; for (unsigned long l = 0; map && l < map->length(); ++l) { NodeImpl::Id attrId = map->idAt(l); DOMStringImpl *attrValue = map->valueAt(l); changed = !bmap->getValue(attrId); bmap->setValue(attrId,attrValue); } if ( changed ) doc()->recalcStyle( NodeImpl::Inherit );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -