📄 element.java
字号:
/* * WebSphinx web-crawling toolkit * * Copyright (c) 1998-2002 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */package websphinx;import java.util.Enumeration;/** * Element in an HTML page. An element runs from a start tag * (like <ul>) to its matching end tag (</ul>), * inclusive. * An element may have an optional end tag (like <p>), * in which case the element runs up to (but not including) * the tag that implicitly closes it. For example: * <PRE><p>Paragraph 1<p>Paragraph 2</PRE> * contains two elements, <PRE><p>Paragraph 1</PRE> * and <PRE><p>Paragraph 2</PRE>. */public class Element extends Region { protected Tag startTag; protected Tag endTag; protected Element sibling; // next sibling protected Element parent; protected Element child; // first child /** * Make an Element from a start tag and end tag. The tags * must be on the same Page. * @param startTag Start tag of element * @param endTag End tag of element (may be null) */ public Element (Tag startTag, Tag endTag) { super (startTag.source, startTag.start, endTag != null ? endTag.end : startTag.end); this.startTag = startTag; this.endTag = endTag; } /** * Make an Element from a start tag and an end position. Used * when the end tag has been omitted (like <p>, frequently). * @param startTag Start tag of element * @param end Ending offset of element */ public Element (Tag startTag, int end) { super (startTag.source, startTag.start, end); this.startTag = startTag; this.endTag = null; } /** * Get tag name. * @return tag name (like "p"), in lower-case, String.intern()'ed form. * Thus you can compare tag names with ==, as in: * <CODE>getTagName() == Tag.IMG</CODE>. */ public String getTagName () { return startTag.getTagName(); } /** * Get start tag. * @return start tag of element */ public Tag getStartTag () { return startTag; } /** * Get end tag. * @return end tag of element, or null if element has no end tag. */ public Tag getEndTag () { return endTag; } /** * Get element's parent. * @return element that contains this element, or null if at top-level. */ public Element getParent () { return parent; } /** * Get element's next sibling. * @return element that follows this element, or null if at end of * parent's children. */ public Element getSibling () { return sibling; } /** * Get element's first child. * @return first element contained by this element, or null if no children. */ public Element getChild () { return child; } /** * Return next element in an inorder walk of the tree, * assuming this element and its children have been visited. * @return next element */ public Element getNext () { if (sibling != null) return sibling; else if (parent != null) return parent.getNext (); else return null; } /** * Test if tag has an HTML attribute. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return true if tag has the attribute, false if not */ public boolean hasHTMLAttribute (String name) { return startTag.hasHTMLAttribute (name); } /** * Get an HTML attribute's value. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or null if tag lacks the attribute. */ public String getHTMLAttribute (String name) { return startTag.getHTMLAttribute (name); } /** * Get an HTML attribute's value, with a default value if it doesn't exist. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @param defaultValue default value to return if the attribute * doesn't exist * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or defaultValue if tag lacks the attribute. */ public String getHTMLAttribute (String name, String defaultValue) { return startTag.getHTMLAttribute (name, defaultValue); } /** * Enumerate the HTML attributes found on this tag. * @return enumeration of the attribute names found on this tag. */ public Enumeration enumerateHTMLAttributes () { return startTag.enumerateHTMLAttributes (); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -