⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 miningarffstream.java

📁 数据挖掘。数据仓库
💻 JAVA
字号:
package org.scut.DataMining.Input.File;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;

import org.scut.DataMining.Core.MiningAttribute;
import org.scut.DataMining.Core.MiningData;
import org.scut.DataMining.Core.MiningException;
import org.scut.DataMining.Core.MiningMetaData;
import org.scut.DataMining.Core.NominalAttribute;
import org.scut.DataMining.Core.NumericAttribute;
import org.scut.DataMining.Input.MiningInputStream;

public class MiningArffStream extends MiningInputStream 
{
	/** File name specified for the Arff file to read */
	private String fileName;
	/** Line string read from the file currently */
	private String line;
	/** Arff file reader */
	private BufferedReader reader;
	/** 
	 * Constructs a arff file reader stream by specified Arff file name
	 * @param fileName arff full path name
	 * @throws FileNotFoundException 
	 */
	public MiningArffStream(String fileName) throws MiningException
	{
		super(null);
		// TODO Auto-generated constructor stub
		this.fileName = fileName;
		this.open();
	}
	/**
	 * Opens the input arff stream, note, this is not neccessary for explicitly open
	 * constructors calls this routine implicitly
	 */
	public void open() throws MiningException
	{
		try
		{
			this.reader = new BufferedReader(new FileReader(this.fileName));
		}
		catch(FileNotFoundException e)
		{
			throw new MiningException(e.getMessage());
		}
		this.recognize();
	}
	/*** Closes the input arff stream */
	public void close() throws MiningException
	{
		try
		{
			this.reader.close();
		}
		catch(IOException e)
		{
			throw new MiningException(e.getMessage());
		}
	}
	/** Reads next record */
	public boolean next() throws MiningException
	{
		while(true)
		{
			try
			{
				this.line = this.reader.readLine();
			}
			catch(IOException e)
			{
				throw new MiningException(e.getMessage());
			}
			if(this.line == null) return false;
			this.line = this.line.trim();
			if(this.line.length() == 0) continue;
			if(this.line.charAt(0) == '%') continue;
			String[] strs = this.line.split("[, \t]");
			ArrayList<String> values = new ArrayList<String>();
			for(String str : strs)
			{
				if(str.length() == 0) continue;
				values.add(str);
			}
			if(values.size() != this.dataSize)
				throw new MiningException("Data size not matched!");
			for(int i=0;i<this.dataSize;i++)
			{
				MiningAttribute ma = this.metaData.getAttribute(i);
				if(values.get(i) == "?") //: missing value
				{
					this.data.set(i,Double.NaN);
					continue;
				}
				if(ma instanceof NumericAttribute )
				{
					NumericAttribute na = (NumericAttribute)ma;
					double value = Double.valueOf(values.get(i));
					if(na.getLowerBound() > value) 
						na.setLowerBound(value);
					if(na.getUpperBound() < value) na.setUpperBound(value);
					this.data.set(i,value);
				}
				else
				{
					this.data.set(i,((NominalAttribute)ma).getNominalValue(values.get(i)));
				}
			}
			break;
		}
		return true;
	}
	/** Recognizes the meta data of the input arff stream*/
	protected void recognize() throws MiningException
	{
		if( !this.recognized && this.metaData == null)
		{
			this.metaData = new MiningMetaData();
		}
		while(true)
		{
			try
			{
				this.line = this.reader.readLine();
			}
			catch(IOException e)
			{
				throw new MiningException(e.getMessage());
			}
			if(this.line == null) break;
			this.line = this.line.trim();
			if(this.line.length() == 0) continue;
			if(this.line.charAt(0) == '%') continue;
			
			String test = this.line.toUpperCase();
			if(test.startsWith("@RELATION")) //: realation section
			{
				String name = this.line.substring("@RELATION".length()).trim();
				this.metaData.setName(name);
				continue;
			}
			if(test.startsWith("@ATTRIBUTE")) //: attribute section
			{
				String[] values = this.line.split("[, \t{}]");
				
				if(values.length < 3)
					throw new MiningException("@Attribute section error!");
				MiningAttribute ma;
				if(this.line.charAt(this.line.length()-1) == '}')
				{
					ma = new NominalAttribute(values[1]);
					for(int i=2;i<values.length;i++)
					{
						if(values[i].length() == 0) continue;
						((NominalAttribute)ma).addNominal(values[i]);
					}
				}
				else
				{
					ma = new NumericAttribute(values[1]);
				}
				this.metaData.addAttribute(ma);
				continue;
			}
			if(test.startsWith("@DATA")) //: data section
				break;
		}
		this.recognized = true;
		this.data = new MiningData(this.metaData.getAttributeCount());
		this.data.setMiningMetaData(this.metaData);
		this.dataSize = this.metaData.getAttributeCount();
		
	}
	/*********************************************************************/
	public static void main(String[] args)
	{
		long start = new Date().getTime();
		MiningArffStream arff = null;
		try 
		{
			arff = new MiningArffStream("arff//pm.arff");
			while(arff.next())
			{
				System.out.println(arff.getData().toString());
			}
		} 
		catch (MiningException e) 
		{
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		long end = new Date().getTime();
		System.out.println("Time eclipsed[s]: " + (end-start)/1000.0);
	}
	/*********************************************************************/
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -