📄 csvloader.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * CSVLoader.java * Copyright (C) 2000 Mark Hall * */package weka.core.converters;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.StreamTokenizer;import java.util.Enumeration;import java.util.Hashtable;/** <!-- globalinfo-start --> * Reads a source that is in comma separated or tab separated format. Assumes that the first row in the file determines the number of and names of the attributes. * <p/> <!-- globalinfo-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.10 $ * @see Loader */public class CSVLoader extends AbstractLoader implements FileSourcedConverter, BatchConverter { /** for serialization */ static final long serialVersionUID = 5607529739745491340L; /** the file extension */ public static String FILE_EXTENSION = ".csv"; /** the file */ protected String m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath(); /** * Holds the determined structure (header) of the data set. */ //@ protected depends: model_structureDetermined -> m_structure; //@ protected represents: model_structureDetermined <- (m_structure != null); protected Instances m_structure = null; /** * Holds the source of the data set. */ //@ protected depends: model_sourceSupplied -> m_sourceFile; //@ protected represents: model_sourceSupplied <- (m_sourceFile != null); protected File m_sourceFile = null; /** * Describe variable <code>m_tokenizer</code> here. */ // private StreamTokenizer m_tokenizer = null; /** * A list of hash tables for accumulating nominal values during parsing. */ private FastVector m_cumulativeStructure; /** * Holds instances accumulated so far */ private FastVector m_cumulativeInstances; /** * default constructor */ public CSVLoader() { // No instances retrieved yet setRetrieval(NONE); } /** * Get the file extension used for arff files * * @return the file extension */ public String getFileExtension() { return FILE_EXTENSION; } /** * Returns a description of the file type. * * @return a short file description */ public String getFileDescription() { return "CSV data files"; } /** * get the File specified as the source * * @return the source file */ public File retrieveFile() { return new File(m_File); } /** * sets the source File * * @param file the source file * @exception IOException if an error occurs */ public void setFile(File file) throws IOException { m_File = file.getAbsolutePath(); setSource(file); } /** * Returns a string describing this attribute evaluator * @return a description of the evaluator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reads a source that is in comma separated or tab separated format. " +"Assumes that the first row in the file determines the number of " +"and names of the attributes."; } /** * Resets the loader ready to read a new data set */ public void reset() { m_structure = null; setRetrieval(NONE); } /** * Resets the Loader object and sets the source of the data set to be * the supplied File object. * * @param file the source file. * @exception IOException if an error occurs */ public void setSource(File file) throws IOException { reset(); if (file == null) { throw new IOException("Source file object is null!"); } m_sourceFile = file; try { BufferedReader br = new BufferedReader(new FileReader(file)); br.close(); } catch (FileNotFoundException ex) { throw new IOException("File not found"); } } /** * Determines and returns (if possible) the structure (internally the * header) of the data set as an empty set of instances. * * @return the structure of the data set as an empty set of Instances * @exception IOException if an error occurs */ public Instances getStructure() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { try { BufferedReader br = new BufferedReader(new FileReader(m_sourceFile)); // assumes that the first line of the file is the header /*m_tokenizer = new StreamTokenizer(br); initTokenizer(m_tokenizer); readHeader(m_tokenizer); */ StreamTokenizer st = new StreamTokenizer(br); initTokenizer(st); readStructure(st); } catch (FileNotFoundException ex) { } } return m_structure; } /** * reads the structure * * @param st the stream tokenizer to read from * @throws IOException if reading fails */ private void readStructure(StreamTokenizer st) throws IOException { readHeader(st); } /** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has been specified"); } // m_sourceReader.close(); setSource(m_sourceFile); BufferedReader br = new BufferedReader(new FileReader(m_sourceFile)); // getStructure(); StreamTokenizer st = new StreamTokenizer(br); initTokenizer(st); readStructure(st); st.ordinaryChar(','); st.ordinaryChar('\t'); m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } // Instances result = new Instances(m_structure); m_cumulativeInstances = new FastVector(); FastVector current; while ((current = getInstance(st)) != null) { m_cumulativeInstances.addElement(current); } br.close(); // now determine the true structure of the data set FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable)m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer)tempHash.get(ob)).intValue(); values.setElementAt(new String(ob.toString()), index); // } } atts.addElement(new Attribute(attname, values)); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -