htmlimagescanner.java

来自「梦界家园程序开发基底框架」· Java 代码 · 共 151 行

JAVA

151 行

// HTMLParser Library v1.1 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// For any questions or suggestions, you can write to me at :
// Email :somik@kizna.com
//
// Postal Address :
// Somik Raha
// R&D Team
// Kizna Corporation
// Hiroo ON Bldg. 2F, 5-19-9 Hiroo,
// Shibuya-ku, Tokyo,
// 150-0012,
// JAPAN
// Tel  :  +81-3-54752646
// Fax : +81-3-5449-4870
// Website : www.kizna.com

package jm.util.html.scanners;
//////////////////
// Java Imports //
//////////////////
import java.io.IOException;
import java.util.Hashtable;

import jm.util.html.HTMLNode;
import jm.util.html.HTMLReader;
import jm.util.html.tags.HTMLImageTag;
import jm.util.html.tags.HTMLTag;
import jm.util.html.util.HTMLLinkProcessor;
/**
 * Scans for the Image Tag. This is a subclass of HTMLTagScanner, and is called using a
 * variant of the template method. If the evaluate() method returns true, that means the
 * given string contains an image tag. Extraction is done by the scan method thereafter
 * by the user of this class.
 */
public class HTMLImageScanner extends HTMLTagScanner
{
	/**
	 * Overriding the default constructor
	 */
	public HTMLImageScanner()
	{
		super();
	}
	/**
	 * Overriding the constructor to accept the filter
	 */
	public HTMLImageScanner(String filter)
	{
		super(filter);
	}
	/**
	 * Template Method, used to decide if this scanner can handle the Image tag type. If
	 * the evaluation returns true, the calling side makes a call to scan().
	 * @param s The complete text contents of the HTMLTag.
	 * @param previousOpenScanner Indicates any previous scanner which hasnt completed, before the current
	 * scan has begun, and hence allows us to write scanners that can work with dirty html
	 */
	public boolean evaluate(String s,HTMLTagScanner previousOpenScanner)
	{
		// Eat up leading blanks
		s = absorbLeadingBlanks(s);
		int state = 0;
		s=s.toUpperCase();
		for (int i=0;(i<s.length() && state<3);i++)
		{
			char ch = s.charAt(i);
			if (ch=='I' && state==0)
			{
				state=1;
				continue;
			}
			if (ch=='M' && state==1)
			{
				state=2;
				continue;
			}
			if (ch=='G' && state==2)
			{
				state=3;
				continue;
			}
			if (ch==' ') continue;
			state=4;
		}
		if (state==3) return true; else return false;
	}
  /**
   * Extract the location of the image, given the string to be parsed, and the url
   * of the html page in which this tag exists.
   * @param s String to be parsed
   * @param url URL of web page being parsed
   */
	public String extractImageLocn(HTMLTag tag,String url)
	{
		Hashtable table = tag.parseParameters();
		String relativeLink =  (String)table.get("SRC");
		if (relativeLink!=null) relativeLink = removeChars(relativeLink,'\n');
		if (relativeLink==null) return ""; else
		return (new HTMLLinkProcessor()).extract(relativeLink,url);
	}
	/**
	 * Scan the tag and extract the information related to the <IMG> tag. The url of the
	 * initiating scan has to be provided in case relative links are found. The initial
	 * url is then prepended to it to give an absolute link.
	 * The HTMLReader is provided in order to do a lookahead operation. We assume that
	 * the identification has already been performed using the evaluate() method.
	 * @param tag HTML Tag to be scanned for identification
	 * @param url The initiating url of the scan (Where the html page lies)
	 * @param reader The reader object responsible for reading the html page
	 * @param currentLine The current line (automatically provided by HTMLTag)
	 */
	public HTMLNode scan(HTMLTag tag,String url,HTMLReader reader,String currentLine) throws IOException
	{
		@SuppressWarnings("unused")
		HTMLNode node;
		@SuppressWarnings("unused")
		String link,linkText="";
		int linkBegin, linkEnd;

		// Yes, the tag is a link
		// Extract the link
		//link = extractImageLocn(tag.getText(),url);
    // save image tagtext first
    String tagText = tag.getText();
		link = extractImageLocn(tag,url);
		linkBegin = tag.elementBegin();
		linkEnd = tag.elementEnd();
		HTMLImageTag imageTag = new HTMLImageTag(link,linkBegin,linkEnd,currentLine);
    // restore tagtext.
    imageTag.setText(tagText);
		imageTag.setThisScanner(this);
		return imageTag;
	}
}

htmlimagescanner.java - 源码说明

本页面展示了「梦界家园程序开发基底框架」中的 htmlimagescanner.java 源码文件，采用 Java 编程语言编写，共 151 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫开发者社区收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?