htmlreader.java

来自「梦界家园程序开发基底框架」· Java 代码 · 共 205 行

JAVA

205 行

// HTMLParser Library v1.1 - A java-based parser for HTML
// Copyright (C) Dec 31, 2000 Somik Raha
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// For any questions or suggestions, you can write to me at :
// Email :somik@kizna.com
//
// Postal Address :
// Somik Raha
// R&D Team
// Kizna Corporation
// Hiroo ON Bldg. 2F, 5-19-9 Hiroo,
// Shibuya-ku, Tokyo,
// 150-0012,
// JAPAN
// Tel  :  +81-3-54752646
// Fax : +81-3-5449-4870
// Website : www.kizna.com
//
// Improvements contributed by Nash Tsai (nash_c@users.sourceforge.net)
// Bug fixes contributed by Kaarle Kaila (kaila@users.sourceforge.net)
//

package jm.util.html;
//////////////////
// Java Imports //
//////////////////
import java.io.BufferedReader;
import java.io.IOException;

import jm.util.html.scanners.HTMLTagScanner;
import jm.util.html.tags.HTMLEndTag;
import jm.util.html.tags.HTMLTag;

/**
 * HTMLReader builds on the BufferedReader, providing methods to read one element
 * at a time
 */
public class HTMLReader extends BufferedReader
{
	protected int posInLine=-1;
	protected String line;
	protected HTMLNode node = null;
	protected HTMLTagScanner previousOpenScanner = null;
	protected String url;
	private java.io.BufferedReader in;
	private HTMLParser parser;
	/**
	 * This constructor basically overrides the existing constructor in the
	 * BufferedReader class.
	 */

	public HTMLReader(BufferedReader in, int len)
	{
		super(in,len);
		this.in = in;
		this.parser = null;
	}
	/**
	 * The constructor takes in a reader object, and the url to be read.
	 */
	public HTMLReader(BufferedReader in,String url)
	{
		super(in);
		this.in = in;
		this.url = url;
		this.parser = null;
	}
/**
 * This method is useful when designing your own scanners. You might need to find out what is the location where the
 * reader has stopped last.
 * @return int Last position read by the reader
 */
public int getLastReadPosition() {
	if (node!=null) return node.elementEnd(); else
	return 0;
}
	/*
	 * Read the next line
	 * @return String containing the line
	 */
	public String getNextLine()
	{
		try
		{
			line = readLine();
			posInLine = 0;
			return line;
		}
		catch (IOException e)
		{
			System.err.println("I/O Exception occurred while reading!");
		}
		return null;
	}
/**
 * Insert the method's description here.
 * Creation date: (12/24/2001 5:27:37 PM)
 * @return com.kizna.html.HTMLParser
 */
public HTMLParser getParser() {
	return parser;
}
	/**
	 * Read the next element
	 * @return HTMLNode - The next node
 	 */
	public HTMLNode readElement() throws IOException
	{
		if (readNextLine())
		{
			do
			{
				line = getNextLine();
			}
			while (line!=null && line.length()==0);

		} else
		posInLine=node.elementEnd()+1;
		if (line==null) return null;
		node = HTMLRemarkNode.find(this,line,posInLine);
		if (node!=null) return node;

		node = HTMLStringNode.find(this,line,posInLine);
		if (node!=null) return node;

		node = HTMLTag.find(this,line,posInLine);
		if (node!=null)
		{
			HTMLTag tag = (HTMLTag)node;
			try
			{
				node = tag.scan(parser.getScanners(),url,this);
				return node;
			}
			catch (IOException e)
			{
				System.err.println("Error! I/O Exception occurred while reading "+url);
			}
		}

		// If we couldnt get a string, then it is probably an end tag

		node = HTMLEndTag.find(line,posInLine);
		if (node!=null) return node;


		return null;
	}
/**
 * Delegates to the BufferedReader's readLine method
 * @return String line read by the reader
 */
public String readLine() throws IOException{
	return in.readLine();
}
	/**
	 * Do we need to read the next line ?
	 * @return true - yes/ false - no
	 */
	protected boolean readNextLine()
	{
		if (posInLine==-1 || (line!=null && node.elementEnd()+1>=line.length()))
				return true;
		else return false;
	}
/**
 * The setParser method is used by the parser to put its own object into the reader. This happens internally,
 * so this method is not generally for use by the developer or the user.
 */
public void setParser(HTMLParser newParser) {
	parser = newParser;
}

	/**
	 * Gets the previousOpenScanner.
	 * @return Returns a HTMLTagScanner
	 */
	public HTMLTagScanner getPreviousOpenScanner() {
		return previousOpenScanner;
	}

	/**
	 * Sets the previousOpenScanner.
	 * @param previousOpenScanner The previousOpenScanner to set
	 */
	public void setPreviousOpenScanner(HTMLTagScanner previousOpenScanner) {
		this.previousOpenScanner = previousOpenScanner;
	}

}

htmlreader.java - 源码说明

本页面展示了「梦界家园程序开发基底框架」中的 htmlreader.java 源码文件，采用 Java 编程语言编写，共 205 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫开发者社区收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?