📄 tag.java
字号:
/* * WebSphinx web-crawling toolkit * * Copyright (c) 1998-2002 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */package websphinx;import java.util.Enumeration;import rcm.enum.ArrayEnumeration;/** * Tag in an HTML page. */public class Tag extends Region { String tagName; boolean startTag; String[] htmlAttributes;// HTML attributes on this tag (lower case and interned) Element element; /** * Make a Tag. * @param page Page containing tag * @param start Starting offset of tag in page * @param end Ending offset of tag * @param tagName Name of tag (like "p") * @param startTag true for start tags (like "<p>"), false for end tags ("</p>") */ public Tag (Page page, int start, int end, String tagName, boolean startTag) { super (page, start, end); this.tagName = tagName.toLowerCase ().intern (); this.startTag = startTag; this.htmlAttributes = null; } /** * Get tag name. * @return tag name (like "p"), in lower-case, String.intern()'ed form. */ public String getTagName () { return tagName; } /** * Get element to which this tag is the start or end tag. * @return element, or null if tag has no element. */ public Element getElement () { return element; } /** * Convert a String to a tag name. Tag names are lower-case, intern()'ed * Strings. Thus you can compare tag names with ==, as in: * <CODE>getTagName() == Tag.IMG</CODE>. * @param name Name to convert (e.g., "P") * @return tag name (e.g. "p"), in lower-case, String.intern()'ed form. */ public static String toTagName (String name) { return name.toLowerCase().intern (); } /** * Test if tag is a start tag. Equivalent to !isEndTag(). * @return true if and only if tag is a start tag (like "<P>") */ public boolean isStartTag () { return startTag; } /** * Test if tag is an end tag. Equivalent to !isStartTag(). * @return true if and only if tag is a start tag (like "</P>") */ public boolean isEndTag () { return !startTag; } /** * Test if tag is a block-level tag. Equivalent to !isFlowTag(). * @return true if and only if tag is a block-level tag (like "<P>") */ public boolean isBlockTag () { return HTMLParser.blocktag.containsKey (tagName); } /** * Test if tag is a flow-level tag. Equivalent to !isBlockTag(). * @return true if and only if tag is a block-level tag (like "<A>") */ public boolean isFlowTag () { return !isBlockTag (); } /** * Test if tag belongs in the <HEAD> element. * @return true if and only if tag is a HEAD-level tag (like "<TITLE>") */ public boolean isHeadTag () { return HTMLParser.headtag.containsKey (tagName); } /** * Test if tag belongs in the <BODY> element. * @return true if and only if tag is a BODY-level tag (like "<A>") */ public boolean isBodyTag () { return !isHeadTag() && tagName != HTML && tagName != HEAD && tagName != BODY; } /** * Convert a String to an HTML attribute name. Attribute names are * lower-case, intern()'ed * Strings. Thus you can compare attribute names with ==. * @param name Name to convert (e.g., "HREF") * @return tag name (e.g. "href"), in lower-case, String.intern()'ed form. */ public static String toHTMLAttributeName (String name) { return name.toLowerCase ().intern (); } /** * Test if tag has an HTML attribute. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return true if tag has the attribute, false if not */ public boolean hasHTMLAttribute (String name) { if (htmlAttributes == null) return false; name = toHTMLAttributeName (name); for (int i=0; i<htmlAttributes.length; ++i) if (htmlAttributes[i] == name) return true; return false; } /** * Get an HTML attribute's value. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or null if tag lacks the attribute. */ public String getHTMLAttribute (String name) { if (htmlAttributes == null) return null; name = toHTMLAttributeName (name); for (int i=0; i<htmlAttributes.length; ++i) if (htmlAttributes[i] == name) return getLabel (name); return null; } /** * Get an HTML attribute's value, with a default value if it doesn't exist. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @param defaultValue default value to return if the attribute * doesn't exist * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or defaultValue if tag lacks the attribute. */ public String getHTMLAttribute (String name, String defaultValue) { String val = getHTMLAttribute (name); return val != null ? val : defaultValue; } /** * Get number of HTML attributes on this tag. * @return number of HTML attributes */ public int countHTMLAttributes () { return htmlAttributes != null ? htmlAttributes.length : 0; } /** * Get all the HTML attributes found on this tag. * @return array of name-value pairs, alternating between * names and values. Thus array[0] is a name, array[1] is a value, * array[2] is a name, etc. */ public String[] getHTMLAttributes () { if (htmlAttributes == null) return new String[0]; String[] result = new String[htmlAttributes.length * 2]; for (int i=0, j=0; i<htmlAttributes.length; ++i) { String name = htmlAttributes[i]; result[j++] = name; result[j++] = getLabel (name); } return result; } /**
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -