📄 tag.java
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University * * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;import java.util.Enumeration;import websphinx.util.ArrayEnumeration;/** * Tag in an HTML page. */public class Tag extends Region { String tagName; boolean startTag; String[] htmlAttributes;// HTML attributes on this tag (lower case and interned) Element element; /** * Make a Tag. * @param page Page containing tag * @param start Starting offset of tag in page * @param end Ending offset of tag * @param tagName Name of tag (like "p") * @param startTag true for start tags (like "<p>"), false for end tags ("</p>") */ public Tag (Page page, int start, int end, String tagName, boolean startTag) { super (page, start, end); this.tagName = tagName.toLowerCase ().intern (); this.startTag = startTag; this.htmlAttributes = null; } /** * Get tag name. * @return tag name (like "p"), in lower-case, String.intern()'ed form. */ public String getTagName () { return tagName; } /** * Get element to which this tag is the start or end tag. * @return element, or null if tag has no element. */ public Element getElement () { return element; } /** * Convert a String to a tag name. Tag names are lower-case, intern()'ed * Strings. Thus you can compare tag names with ==, as in: * <CODE>getTagName() == Tag.IMG</CODE>. * @param name Name to convert (e.g., "P") * @return tag name (e.g. "p"), in lower-case, String.intern()'ed form. */ public static String toTagName (String name) { return name.toLowerCase().intern (); } /** * Test if tag is a start tag. Equivalent to !isEndTag(). * @return true if and only if tag is a start tag (like "<P>") */ public boolean isStartTag () { return startTag; } /** * Test if tag is an end tag. Equivalent to !isStartTag(). * @return true if and only if tag is a start tag (like "</P>") */ public boolean isEndTag () { return !startTag; } /** * Test if tag is a block-level tag. Equivalent to !isFlowTag(). * @return true if and only if tag is a block-level tag (like "<P>") */ public boolean isBlockTag () { return HTMLParser.blocktag.containsKey (tagName); } /** * Test if tag is a flow-level tag. Equivalent to !isBlockTag(). * @return true if and only if tag is a block-level tag (like "<A>") */ public boolean isFlowTag () { return !isBlockTag (); } /** * Test if tag belongs in the <HEAD> element. * @return true if and only if tag is a HEAD-level tag (like "<TITLE>") */ public boolean isHeadTag () { return HTMLParser.headtag.containsKey (tagName); } /** * Test if tag belongs in the <BODY> element. * @return true if and only if tag is a BODY-level tag (like "<A>") */ public boolean isBodyTag () { return !isHeadTag() && tagName != HTML && tagName != HEAD && tagName != BODY; } /** * Convert a String to an HTML attribute name. Attribute names are * lower-case, intern()'ed * Strings. Thus you can compare attribute names with ==. * @param name Name to convert (e.g., "HREF") * @return tag name (e.g. "href"), in lower-case, String.intern()'ed form. */ public static String toHTMLAttributeName (String name) { return name.toLowerCase ().intern (); } /** * Test if tag has an HTML attribute. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return true if tag has the attribute, false if not */ public boolean hasHTMLAttribute (String name) { if (htmlAttributes == null) return false; name = toHTMLAttributeName (name); for (int i=0; i<htmlAttributes.length; ++i) if (htmlAttributes[i] == name) return true; return false; } /** * Get an HTML attribute's value. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or null if tag lacks the attribute. */ public String getHTMLAttribute (String name) { if (htmlAttributes == null) return null; name = toHTMLAttributeName (name); for (int i=0; i<htmlAttributes.length; ++i) if (htmlAttributes[i] == name) return getLabel (name); return null; } /** * Get an HTML attribute's value, with a default value if it doesn't exist. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be * converted with toHTMLAttributeName(). * @param defaultValue default value to return if the attribute * doesn't exist * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or defaultValue if tag lacks the attribute. */ public String getHTMLAttribute (String name, String defaultValue) { String val = getHTMLAttribute (name); return val != null ? val : defaultValue; } /** * Get number of HTML attributes on this tag. * @return number of HTML attributes */ public int countHTMLAttributes () { return htmlAttributes != null ? htmlAttributes.length : 0; } /** * Get all the HTML attributes found on this tag. * @return array of name-value pairs, alternating between * names and values. Thus array[0] is a name, array[1] is a value, * array[2] is a name, etc. */ public String[] getHTMLAttributes () { if (htmlAttributes == null) return new String[0]; String[] result = new String[htmlAttributes.length * 2]; for (int i=0, j=0; i<htmlAttributes.length; ++i) { String name = htmlAttributes[i]; result[j++] = name; result[j++] = getLabel (name); } return result; } /** * Enumerate the HTML attributes found on this tag. * @return enumeration of the attribute names found on this tag. */ public Enumeration enumerateHTMLAttributes () { return new ArrayEnumeration (htmlAttributes); } /** * Copy this tag, removing an HTML attribute. * @param name Name of HTML attribute (e.g. "HREF"). Doesn't have to be
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -