📄 htmltree.java
字号:
/* * HTML Parser * Copyright (C) 1997 David McNicol * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * file COPYING for more details. */package cvu.html;import java.util.Enumeration;/** * This class stores an HTML file in tree format. It can be constructed * from an HTMLTokenizer or a file name, in which case it will create its * own tokenizer. <p> * Once the HTML file has been parsed a number of search operations can * be performed. The nature of the searches are described below, but some * of their uses are highlighted here: <p> * <ul> * <li>Subtree - Finding all of the FORM elements within a BODY element. * <li>Sibling - Finding all the LI elements within the same UL element. * <li>All - Finding every occurence of the A element. * </ul> * There is also a context search, which performs a subtree search on the * specified element's parent. This can be thought of as a combination * between as sibling search and a subtree search. * @see HTMLTokenizer * @author <a href="http://www.strath.ac.uk/~ras97108/">David McNicol</a> */public class HTMLTree { private HTMLNode root; // The root of the HTML tree. /** * Constructs a new HTMLTree using the tokens from the * specified Enumeration. */ public HTMLTree (Enumeration e) { // Create the root element from the enumeration of tokens. root = new HTMLNode(null, null, e); } /** * Constructs a new HTMLTree using the tokens from the * specified HTMLTokenizer. * @param ht the source of the HTML tokens. */ public HTMLTree (HTMLTokenizer ht) { // Create the root element from the tokens. root = new HTMLNode(null, null, ht.getTokens()); } /** * Constructs a new HTMLTree from the specified HTML file. * @param filename the name of the HTML file. */ public HTMLTree (String filename) { // Tokenize the HTML file. HTMLTokenizer ht = new HTMLTokenizer(filename); // Create the root element from the tokens. root = new HTMLNode(null, null, ht.getTokens()); } /** * Finds the first element with the specified name in the specified * subtree. * @param name the name of the element to search for. * @param tree the subtree to search. */ public HTMLNode findInSubtree (String name, HTMLNode tree) { return find(name, tree, null, true, false); } /** * Finds the next element after the specified one in the subtree. * If the previous element is not in the subtree then nothing will * be found. * @param tree the subtree to search. * @param prev a previously found element. */ public HTMLNode findNextInSubtree (HTMLNode tree, HTMLNode prev) { // Return nothing if there is no previous element. if (prev == null) return null; // Search the subtree for the next element with the same name. return find(prev.getName(), tree, prev, true, false); } /** * Finds the first element with the specified name in the entire * tree. * @param name the name of the element to search for. */ public HTMLNode findInAll (String name) { return find(name, root, null, true, false); } /** * Finds the next element with the same name as the one specified * in the entire tree. * @param prev the previously found element. */ public HTMLNode findNextInAll (HTMLNode prev) { // Return nothing if there is no previous element. if (prev == null) return null; // Search for the next element in the entire tree. return find(prev.getName(), prev.getParent(), prev, true, true); } /** * Find the first element with the specified name in the specified * element's context (that is, the elements parent's subtree). * @param name the name of the element to search for. * @param el the element whose context is to be searched. */ public HTMLNode findInContext (String name, HTMLNode el) { // Return nothing if the arguments are invalid. if (el == null) return null; // Search the elements parent's subtree. return find(name, el.getParent(), null, true, false); } /** * Find the next element with the same name as the specified one * in the first element's context (that is, the first elements * parent's subtree). * If the previous element is not in the subtree then nothing * will be found. * @param el the element whose context is to be searched. * @param the previously found element. */ public HTMLNode findNextInContext (HTMLNode el, HTMLNode prev) { // Return nothing if the arguments are invalid. if (el == null) return null; // Search the elements parent's subtree. return find(el.getName(), el.getParent(), prev, true, false); } /** * Finds the next element with the same name as the specified * one amongst that elements siblings (that is, the elements * parent's children). * @param el the element whose siblings are to be searched. */ public HTMLNode findSibling (HTMLNode el) { // Return nothing if the element is invalid. if (el == null) return null; // Search for a sibling in the elements parent's subtree. return find(el.getName(), el.getParent(), el, false, false); } /** * Prints a string representation of the HTMLTree. */ public String toString () { return "HTMLTree[" + root + "]"; } /** * Generic find method which searches for a string in the given * tree's children. However, the search will not start until the * start element has been passed. The tree's grandchildren will * be searched recursively if the <code>recursive</code> argument * is true. The whole tree after the element will be searched * if the <code>searchParent</code> argument is true. In this * case the method will recurse back towards the root element. */ private HTMLNode find (String name, HTMLNode tree, HTMLNode start, boolean recursive, boolean searchParent) { Enumeration children; // The immediate children of the subtree. Object next; // The next object from the enumeration. boolean searching; // True if we are actively searching. HTMLNode child; // One of the subtree's children. HTMLNode found; // Result of a subtree search. // Return nothing if the arguments are invalid. if (name == null || tree == null) return null; // Check if we should delay the search until we find the // specified start element. searching = (start == null); // Get the subtree's children. children = tree.getChildren(); // Return nothing if the subtree has no children. if (children == null) return null; // Loop through the subtree's children. while (children.hasMoreElements()) { // Get the next child from the enumeration. next = children.nextElement(); // Check if this child is an HTMLNode. if (! (next instanceof HTMLNode)) continue; // Cast the child into type HTMLNode. child = (HTMLNode) next; if (searching) { // Check if we have found the element. if (name.equalsIgnoreCase(child.getName())) return child; // Check if we should search grandchildren. if (recursive) { // Search the child's subtree. found = find(name, child, null, true, false); // Return the element if we found one. if (found != null) return found; } } else { // Check if this element is the start element. if (child == start) searching = true; } } // Check if we should search the subtree's parent tree. if (searchParent) { HTMLNode parent = tree.getParent(); // Check if the subtree has a parent. if (parent == null) return null; // Otherwise search it, starting after the subtree. return find(name, parent, tree, true, true); } return null; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -