⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tag.java

📁 一个用java语言编写的网络爬虫程序
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx;import java.util.Enumeration;import websphinx.util.ArrayEnumeration;/** * Tag in an HTML page. */public class Tag extends Region {    String tagName;    boolean startTag;    String[] htmlAttributes;// HTML attributes on this tag (lower case and interned)    Element element;    /**     * Make a Tag.     * @param page Page containing tag     * @param start Starting offset of tag in page     * @param end Ending offset of tag     * @param tagName Name of tag (like "p")     * @param startTag true for start tags (like "&lt;p&gt;"), false for end tags ("&lt;/p&gt;")     */    public Tag (Page page, int start, int end, String tagName, boolean startTag) {        super (page, start, end);        this.tagName = tagName.toLowerCase ().intern ();        this.startTag = startTag;        this.htmlAttributes = null;    }    /**     * Get tag name.     * @return tag name (like "p"), in lower-case, String.intern()'ed form.     */    public String getTagName () {        return tagName;    }    /**     * Get element to which this tag is the start or end tag.     * @return element, or null if tag has no element.     */    public Element getElement () {        return element;    }    /**     * Convert a String to a tag name.  Tag names are lower-case, intern()'ed     * Strings.  Thus you can compare tag names with ==, as in:      * <CODE>getTagName() == Tag.IMG</CODE>.     * @param name Name to convert (e.g., "P")     * @return tag name (e.g. "p"), in lower-case, String.intern()'ed form.     */    public static String toTagName (String name) {        return name.toLowerCase().intern ();    }    /**     * Test if tag is a start tag.  Equivalent to !isEndTag().     * @return true if and only if tag is a start tag (like "&lt;P&gt;")     */    public boolean isStartTag () {        return startTag;    }    /**     * Test if tag is an end tag.  Equivalent to !isStartTag().     * @return true if and only if tag is a start tag (like "&lt;/P&gt;")     */    public boolean isEndTag () {        return !startTag;    }    /**     * Test if tag is a block-level tag.  Equivalent to !isFlowTag().     * @return true if and only if tag is a block-level tag (like "&lt;P&gt;")     */    public boolean isBlockTag () {        return HTMLParser.blocktag.containsKey (tagName);    }    /**     * Test if tag is a flow-level tag.  Equivalent to !isBlockTag().     * @return true if and only if tag is a block-level tag (like "&lt;A&gt;")     */    public boolean isFlowTag () {        return !isBlockTag ();    }    /**     * Test if tag belongs in the <HEAD> element.     * @return true if and only if tag is a HEAD-level tag (like "&lt;TITLE&gt;")     */    public boolean isHeadTag () {        return HTMLParser.headtag.containsKey (tagName);    }    /**     * Test if tag belongs in the <BODY> element.     * @return true if and only if tag is a BODY-level tag (like "&lt;A&gt;")     */    public boolean isBodyTag () {        return !isHeadTag()                 && tagName != HTML                 && tagName != HEAD                 && tagName != BODY;    }    /**     * Convert a String to an HTML attribute name.  Attribute names are     * lower-case, intern()'ed     * Strings.  Thus you can compare attribute names with ==.     * @param name Name to convert (e.g., "HREF")     * @return tag name (e.g. "href"), in lower-case, String.intern()'ed form.     */    public static String toHTMLAttributeName (String name) {        return name.toLowerCase ().intern ();    }    /**     * Test if tag has an HTML attribute.     * @param name Name of HTML attribute (e.g. "HREF").  Doesn't have to be     * converted with toHTMLAttributeName().      * @return true if tag has the attribute, false if not     */    public boolean hasHTMLAttribute (String name) {        if (htmlAttributes == null)            return false;        name = toHTMLAttributeName (name);        for (int i=0; i<htmlAttributes.length; ++i)            if (htmlAttributes[i] == name)                return true;        return false;    }    /**     * Get an HTML attribute's value.     * @param name Name of HTML attribute (e.g. "HREF").  Doesn't have to be     * converted with toHTMLAttributeName().      * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or null if tag lacks the attribute.     */    public String getHTMLAttribute (String name) {        if (htmlAttributes == null)            return null;        name = toHTMLAttributeName (name);        for (int i=0; i<htmlAttributes.length; ++i)            if (htmlAttributes[i] == name)                return getLabel (name);        return null;    }    /**     * Get an HTML attribute's value, with a default value if it doesn't exist.     * @param name Name of HTML attribute (e.g. "HREF").  Doesn't have to be     * converted with toHTMLAttributeName().      * @param defaultValue default value to return if the attribute      * doesn't exist     * @return value of attribute if it exists, TRUE if the attribute exists but has no value, or defaultValue if tag lacks the attribute.     */    public String getHTMLAttribute (String name, String defaultValue) {        String val = getHTMLAttribute (name);        return val != null ? val : defaultValue;    }        /**     * Get number of HTML attributes on this tag.     * @return number of HTML attributes     */    public int countHTMLAttributes () {        return htmlAttributes != null ? htmlAttributes.length : 0;    }    /**     * Get all the HTML attributes found on this tag.     * @return array of name-value pairs, alternating between      * names and values.  Thus array[0] is a name, array[1] is a value,     * array[2] is a name, etc.     */    public String[] getHTMLAttributes () {        if (htmlAttributes == null)            return new String[0];        String[] result = new String[htmlAttributes.length * 2];        for (int i=0, j=0; i<htmlAttributes.length; ++i) {            String name = htmlAttributes[i];            result[j++] = name;            result[j++] = getLabel (name);        }        return result;    }    /**     * Enumerate the HTML attributes found on this tag.     * @return enumeration of the attribute names found on this tag.     */    public Enumeration enumerateHTMLAttributes () {        return new ArrayEnumeration (htmlAttributes);    }    /**     * Copy this tag, removing an HTML attribute.     * @param name Name of HTML attribute (e.g. "HREF").  Doesn't have to be

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -