⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 linktag.java

📁 本程序用于对页面信息进行提取并分析
💻 JAVA
字号:
// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML// http://sourceforge.org/projects/htmlparser// Copyright (C) 2004 Somik Raha//// Revision Control Information//// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v $// $Author: derrickoswald $// $Date: 2005/04/10 23:20:45 $// $Revision: 1.54 $//// This library is free software; you can redistribute it and/or// modify it under the terms of the GNU Lesser General Public// License as published by the Free Software Foundation; either// version 2.1 of the License, or (at your option) any later version.//// This library is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// Lesser General Public License for more details.//// You should have received a copy of the GNU Lesser General Public// License along with this library; if not, write to the Free Software// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA//package org.htmlparser.tags;import org.htmlparser.Node;import org.htmlparser.util.ParserUtils;import org.htmlparser.util.SimpleNodeIterator;/** * Identifies a link tag. */public class LinkTag extends CompositeTag{    /**     * The set of names handled by this tag.     */    private static final String[] mIds = new String[] {"A"};    /**     * The set of tag names that indicate the end of this tag.     */    private static final String[] mEnders = new String[] {"A", "P", "DIV", "TD", "TR", "FORM", "LI"};    /**     * The set of end tag names that indicate the end of this tag.     */    private static final String[] mEndTagEnders = new String[] {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"};    /**     * The URL where the link points to     */    protected String mLink;    /**     * Set to true when the link was a mailto: URL.     */    private boolean mailLink;    /**     * Set to true when the link was a javascript: URL.     */    private boolean javascriptLink;    /**     * Constructor creates an LinkTag object, which basically stores the location     * where the link points to, and the text it contains.     * <p>     * In order to get the contents of the link tag, use the method linkData(),     * which returns an enumeration of nodes encapsulated within the link.     * <p>     * The following code will get all the images inside a link tag.     * <pre>     * Node node ;     * ImageTag imageTag;     * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) {     *      node = (Node)e.nextElement();     *      if (node instanceof ImageTag) {     *          imageTag = (ImageTag)node;     *          // Process imageTag     *      }     * }     * </pre>     */    public LinkTag ()    {    }    /**     * Return the set of names handled by this tag.     * @return The names to be matched that create tags of this type.     */    public String[] getIds ()    {        return (mIds);    }    /**     * Return the set of tag names that cause this tag to finish.     * @return The names of following tags that stop further scanning.     */    public String[] getEnders ()    {        return (mEnders);    }    /**     * Return the set of end tag names that cause this tag to finish.     * @return The names of following end tags that stop further scanning.     */    public String[] getEndTagEnders ()    {        return (mEndTagEnders);    }    /**     * Get the <code>ACCESSKEY</code> attribute, if any.     * @return The value of the <code>ACCESSKEY</code> attribute,     * or <code>null</code> if the attribute doesn't exist.     */    public String getAccessKey()    {        return (getAttribute("ACCESSKEY"));    }    /**     * Returns the url as a string, to which this link points.     * This string has had the "mailto:" and "javascript:" protocol stripped     * off the front (if those predicates return <code>true</code>) but not     * for other protocols. Don't ask me why, it's a legacy thing.     * @return The URL for this <code>A</code> tag.     */    public String getLink()    {        if (null == mLink)        {            mailLink=false;            javascriptLink = false;            mLink = extractLink ();            int mailto = mLink.indexOf("mailto");            if (mailto==0)            {                // yes it is                mailto = mLink.indexOf(":");                mLink = mLink.substring(mailto+1);                mailLink = true;            }            int javascript = mLink.indexOf("javascript:");            if (javascript == 0)            {                mLink = mLink.substring(11); // this magic number is "javascript:".length()                javascriptLink = true;            }        }        return (mLink);    }    /**     * Returns the text contained inside this link tag.     * @return The textual contents between the {@.html <A></A>} pair.     */    public String getLinkText()    {        String ret;        if (null != getChildren ())            ret = getChildren ().asString ();        else            ret = "";        return (ret);    }    /**     * Is this a mail address     * @return boolean true/false     */    public boolean isMailLink()    {        getLink (); // force an evaluation of the booleans        return (mailLink);    }    /**     * Tests if the link is javascript     * @return flag indicating if the link is a javascript code     */    public boolean isJavascriptLink()    {        getLink (); // force an evaluation of the booleans        return (javascriptLink);    }    /**     * Tests if the link is an FTP link.     *     * @return flag indicating if this link is an FTP link     */    public boolean isFTPLink() {        return getLink ().indexOf("ftp://")==0;    }    /**     * Tests if the link is an IRC link.     * @return flag indicating if this link is an IRC link     */    public boolean isIRCLink() {        return getLink ().indexOf("irc://")==0;    }    /**     * Tests if the link is an HTTP link.     *     * @return flag indicating if this link is an HTTP link     */    public boolean isHTTPLink()    {        return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink());    }    /**     * Tests if the link is an HTTPS link.     *     * @return flag indicating if this link is an HTTPS link     */    public boolean isHTTPSLink() {            return getLink ().indexOf("https://")==0;    }        /**     * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.).     *     * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.)     */    public boolean isHTTPLikeLink() {            return isHTTPLink() || isHTTPSLink();    }    /**     * Insert the method's description here.     * Creation date: (8/3/2001 1:49:31 AM)     * @param newMailLink boolean     */    public void setMailLink(boolean newMailLink)    {        mailLink = newMailLink;    }    /**     * Set the link as a javascript link.     *     * @param newJavascriptLink flag indicating if the link is a javascript code     */    public void setJavascriptLink(boolean newJavascriptLink)    {        javascriptLink = newJavascriptLink;    }    /**     * Return the contents of this link node as a string suitable for debugging.     * @return A string representation of this node.     */    public String toString()    {        StringBuffer sb = new StringBuffer();        sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey=");        if (getAccessKey ()==null)            sb.append("null\n");        else            sb.append(getAccessKey ()+"\n");        if (null != getChildren ())        {            sb.append("  "+"LinkData\n");            sb.append("  "+"--------\n");            Node node;            int i = 0;            for (SimpleNodeIterator e=children();e.hasMoreNodes();)            {                node = e.nextNode();                sb.append("   "+(i++)+ " ");                sb.append(node.toString()+"\n");            }        }        sb.append("  "+"*** END of LinkData ***\n");        return sb.toString();    }    /**     * Set the <code>HREF</code> attribute.     * @param link The new value of the <code>HREF</code> attribute.     */    public void setLink(String link)    {        mLink = link;        setAttribute ("HREF", link);    }    /**     * This method returns an enumeration of data that it contains     * @return Enumeration     * @deprecated Use children() instead.     */    public SimpleNodeIterator linkData() {        return children();    }    /**     * Extract the link from the HREF attribute.     * @return The URL from the HREF attibute. This is absolute if the tag has     * a valid page.     */    public String extractLink ()    {        String ret;        ret =  getAttribute ("HREF");        if (null != ret)        {            ret = ParserUtils.removeChars (ret,'\n');            ret = ParserUtils.removeChars (ret,'\r');        }        if (null != getPage ())            ret = getPage ().getAbsoluteURL (ret);        return (ret);    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -