csvloader.java

来自「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」· Java 代码 · 共 560 行 · 第 1/2 页
JAVA
560 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    CSVLoader.java *    Copyright (C) 2000 Mark Hall * */package weka.core.converters;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.StreamTokenizer;import java.util.Enumeration;import java.util.Hashtable;/** <!-- globalinfo-start --> * Reads a source that is in comma separated or tab separated format. Assumes that the first row in the file determines the number of and names of the attributes. * <p/> <!-- globalinfo-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.10 $ * @see Loader */public class CSVLoader   extends AbstractLoader   implements FileSourcedConverter, BatchConverter {  /** for serialization */  static final long serialVersionUID = 5607529739745491340L;    /** the file extension */  public static String FILE_EXTENSION = ".csv";  /** the file */  protected String m_File =     (new File(System.getProperty("user.dir"))).getAbsolutePath();  /**   * Holds the determined structure (header) of the data set.   */  //@ protected depends: model_structureDetermined -> m_structure;  //@ protected represents: model_structureDetermined <- (m_structure != null);  protected Instances m_structure = null;  /**   * Holds the source of the data set.   */  //@ protected depends: model_sourceSupplied -> m_sourceFile;  //@ protected represents: model_sourceSupplied <- (m_sourceFile != null);  protected File m_sourceFile = null;  /**   * Describe variable <code>m_tokenizer</code> here.   */  //  private StreamTokenizer m_tokenizer = null;  /**   * A list of hash tables for accumulating nominal values during parsing.   */  private FastVector m_cumulativeStructure;  /**   * Holds instances accumulated so far   */  private FastVector m_cumulativeInstances;    /**   * default constructor   */  public CSVLoader() {    // No instances retrieved yet    setRetrieval(NONE);  }  /**   * Get the file extension used for arff files   *   * @return the file extension   */  public String getFileExtension() {    return FILE_EXTENSION;  }  /**   * Returns a description of the file type.   *   * @return a short file description   */  public String getFileDescription() {    return "CSV data files";  }  /**   * get the File specified as the source   *   * @return the source file   */  public File retrieveFile() {    return new File(m_File);  }  /**   * sets the source File   *   * @param file the source file   * @exception IOException if an error occurs   */  public void setFile(File file) throws IOException {    m_File = file.getAbsolutePath();    setSource(file);  }  /**   * Returns a string describing this attribute evaluator   * @return a description of the evaluator suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Reads a source that is in comma separated or tab separated format. "      +"Assumes that the first row in the file determines the number of "      +"and names of the attributes.";  }    /**   * Resets the loader ready to read a new data set   */  public void reset() {    m_structure = null;    setRetrieval(NONE);  }  /**   * Resets the Loader object and sets the source of the data set to be    * the supplied File object.   *   * @param file the source file.   * @exception IOException if an error occurs   */  public void setSource(File file) throws IOException {    reset();    if (file == null) {      throw new IOException("Source file object is null!");    }    m_sourceFile = file;    try {      BufferedReader br = new BufferedReader(new FileReader(file));      br.close();    } catch (FileNotFoundException ex) {      throw new IOException("File not found");    }  }  /**   * Determines and returns (if possible) the structure (internally the    * header) of the data set as an empty set of instances.   *   * @return the structure of the data set as an empty set of Instances   * @exception IOException if an error occurs   */  public Instances getStructure() throws IOException {    if (m_sourceFile == null) {      throw new IOException("No source has been specified");    }    if (m_structure == null) {      try {	BufferedReader br = new BufferedReader(new FileReader(m_sourceFile));     	// assumes that the first line of the file is the header	/*m_tokenizer = new StreamTokenizer(br);	initTokenizer(m_tokenizer);	readHeader(m_tokenizer); */	StreamTokenizer st = new StreamTokenizer(br);	initTokenizer(st);	readStructure(st);      } catch (FileNotFoundException ex) {      }    }        return m_structure;  }  /**   * reads the structure   *    * @param st the stream tokenizer to read from   * @throws IOException if reading fails   */  private void readStructure(StreamTokenizer st) throws IOException {    readHeader(st);  }  /**   * Return the full data set. If the structure hasn't yet been determined   * by a call to getStructure then method should do so before processing   * the rest of the data set.   *   * @return the structure of the data set as an empty set of Instances   * @exception IOException if there is no source or parsing fails   */  public Instances getDataSet() throws IOException {    if (m_sourceFile == null) {      throw new IOException("No source has been specified");    }    //    m_sourceReader.close();    setSource(m_sourceFile);    BufferedReader br = new BufferedReader(new FileReader(m_sourceFile));    //    getStructure();    StreamTokenizer st = new StreamTokenizer(br);    initTokenizer(st);    readStructure(st);        st.ordinaryChar(',');    st.ordinaryChar('\t');        m_cumulativeStructure = new FastVector(m_structure.numAttributes());    for (int i = 0; i < m_structure.numAttributes(); i++) {      m_cumulativeStructure.addElement(new Hashtable());    }        // Instances result = new Instances(m_structure);    m_cumulativeInstances = new FastVector();    FastVector current;    while ((current = getInstance(st)) != null) {      m_cumulativeInstances.addElement(current);    }    br.close();    // now determine the true structure of the data set    FastVector atts = new FastVector(m_structure.numAttributes());    for (int i = 0; i < m_structure.numAttributes(); i++) {      String attname = m_structure.attribute(i).name();      Hashtable tempHash = ((Hashtable)m_cumulativeStructure.elementAt(i));      if (tempHash.size() == 0) {	atts.addElement(new Attribute(attname));      } else {	FastVector values = new FastVector(tempHash.size());	// add dummy objects in order to make the FastVector's size == capacity	for (int z = 0; z < tempHash.size(); z++) {	  values.addElement("dummy");	}	Enumeration e = tempHash.keys();	while (e.hasMoreElements()) {	  Object ob = e.nextElement();	  //	  if (ob instanceof Double) {	  int index = ((Integer)tempHash.get(ob)).intValue();	  values.setElementAt(new String(ob.toString()), index);	  //	  }	}	atts.addElement(new Attribute(attname, values));      }    }
csvloader.java - 源码说明

本页面展示了「Java 编写的多种数据挖掘算法包括聚类、分类、预处理等」中的 csvloader.java 源码文件，采用 Java 编程语言编写，共 560 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?