📄 compositetagscanner.java
字号:
// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2003 Somik Raha//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/scanners/CompositeTagScanner.java,v $// $Author: derrickoswald $// $Date: 2005/04/10 23:20:44 $// $Revision: 1.90 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.scanners;import java.util.Vector;import org.htmlparser.Attribute;import org.htmlparser.Node;import org.htmlparser.Tag;import org.htmlparser.lexer.Lexer;import org.htmlparser.lexer.Page;import org.htmlparser.scanners.Scanner;import org.htmlparser.util.NodeList;import org.htmlparser.util.ParserException;/** * The main scanning logic for nested tags. * When asked to scan, this class gathers nodes into a heirarchy of tags. */public class CompositeTagScanner extends TagScanner{ /** * Determine whether to use JVM or NodeList stack. * This can be set to true to get the original behaviour of * recursion into composite tags on the JVM stack. * This may lead to StackOverFlowException problems in some cases * i.e. Windows. */ private static final boolean mUseJVMStack = false; /** * Determine whether unexpected end tags should cause stack roll-up. * This can be set to true to get the original behaviour of gathering * end tags into whatever tag is open. * This can be expensive, but should only be needed in the presence of * bad HTML. */ private static final boolean mLeaveEnds = false; /** * Create a composite tag scanner. */ public CompositeTagScanner () { } /** * Collect the children. * <p>An initial test is performed for an empty XML tag, in which case * the start tag and end tag of the returned tag are the same and it has * no children.<p> * If it's not an empty XML tag, the lexer is repeatedly asked for * subsequent nodes until an end tag is found or a node is encountered * that matches the tag ender set or end tag ender set. * In the latter case, a virtual end tag is created. * Each node found that is not the end tag is added to * the list of children. The end tag is special and not a child.<p> * Nodes that also have a CompositeTagScanner as their scanner are * recursed into, which provides the nested structure of an HTML page. * This method operates in two possible modes, depending on a private boolean. * It can recurse on the JVM stack, which has caused some overflow problems * in the past, or it can use the supplied stack argument to nest scanning * of child tags within itself. The former is left as an option in the code, * mostly to help subsequent modifiers visualize what the internal nesting * is doing. * @param tag The tag this scanner is responsible for. * @param lexer The source of subsequent nodes. * @param stack The parse stack. May contain pending tags that enclose * this tag. * @return The resultant tag (may be unchanged). */ public Tag scan (Tag tag, Lexer lexer, NodeList stack) throws ParserException { Node node; Tag next; String name; Scanner scanner; Tag ret; ret = tag; if (ret.isEmptyXmlTag ()) ret.setEndTag (ret); else do { node = lexer.nextNode (false); if (null != node) { if (node instanceof Tag) { next = (Tag)node; name = next.getTagName (); // check for normal end tag if (next.isEndTag () && name.equals (ret.getTagName ())) { ret.setEndTag (next); node = null; } else if (isTagToBeEndedFor (ret, next)) // check DTD { // backup one node. insert a virtual end tag later lexer.setPosition (next.getStartPosition ()); node = null; } else if (!next.isEndTag ()) { // now recurse if there is a scanner for this type of tag scanner = next.getThisScanner (); if (null != scanner) { if (mUseJVMStack) { // JVM stack recursion node = scanner.scan (next, lexer, stack); addChild (ret, node); } else { // fake recursion: if (scanner == this) { if (next.isEmptyXmlTag ()) { next.setEndTag (next); finishTag (next, lexer); addChild (ret, next); } else { stack.add (ret); ret = next; } } else { // normal recursion if switching scanners node = scanner.scan (next, lexer, stack); addChild (ret, node); } } } else addChild (ret, next); } else { if (!mUseJVMStack && !mLeaveEnds) { // Since all non-end tags are consumed by the // previous clause, we're here because we have an // end tag with no opening tag... this could be bad. // There are two cases... // 1) The tag hasn't been registered, in which case // we just add it as a simple child, like it's // opening tag // 2) There may be an opening tag further up the // parse stack that needs closing. // So, we ask the factory for a node like this one // (since end tags never have scanners) and see // if it's scanner is a composite tag scanner. // If it is we walk up the parse stack looking for // something that needs this end tag to finish it.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -