📄 c45loader.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * C45Loader.java * Copyright (C) 2000 Mark Hall * */package weka.core.converters;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.Reader;import java.io.StreamTokenizer;/** <!-- globalinfo-start --> * Reads a file that is C45 format. Can take a filestem or filestem with .names or .data appended. Assumes that path/<filestem>.names and path/<filestem>.data exist and contain the names and data respectively. * <p/> <!-- globalinfo-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.11 $ * @see Loader */public class C45Loader extends AbstractLoader implements FileSourcedConverter, BatchConverter, IncrementalConverter { /** for serialization */ static final long serialVersionUID = 5454329403218219L; /** the file extension */ public static String FILE_EXTENSION = ".names"; /** the file */ protected String m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath(); /** * Holds the determined structure (header) of the data set. */ //@ protected depends: model_structureDetermined -> m_structure; //@ protected represents: model_structureDetermined <- (m_structure != null); protected Instances m_structure = null; /** * Holds the source of the data set. In this case the names file of the * data set. m_sourceFileData is the data file. */ //@ protected depends: model_sourceSupplied -> m_sourceFile; //@ protected represents: model_sourceSupplied <- (m_sourceFile != null); protected File m_sourceFile = null; /** * Describe variable <code>m_sourceFileData</code> here. */ private File m_sourceFileData = null; /** * Reader for names file */ private transient Reader m_namesReader = null; /** * Reader for data file */ private transient Reader m_dataReader = null; /** * Holds the filestem. */ private String m_fileStem; /** * Number of attributes in the data (including ignore and label attributes). */ private int m_numAttribs; /** * Which attributes are ignore or label. These are *not* included in the * arff representation. */ private boolean [] m_ignore; /** * Returns a string describing this attribute evaluator * @return a description of the evaluator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reads a file that is C45 format. Can take a filestem or filestem " +"with .names or .data appended. Assumes that path/<filestem>.names and " +"path/<filestem>.data exist and contain the names and data " +"respectively."; } /** * Resets the Loader ready to read a new data set * * @throws Exception if something goes wrong */ public void reset() throws Exception { m_structure = null; setRetrieval(NONE); if (m_File != null) { setFile(new File(m_File)); } } /** * Get the file extension used for arff files * * @return the file extension */ public String getFileExtension() { return FILE_EXTENSION; } /** * Returns a description of the file type. * * @return a short file description */ public String getFileDescription() { return "C4.5 data files"; } /** * get the File specified as the source * * @return the source file */ public File retrieveFile() { return new File(m_File); } /** * sets the source File * * @param file the source file * @exception IOException if an error occurs */ public void setFile(File file) throws IOException { m_File = file.getAbsolutePath(); setSource(file); } /** * Resets the Loader object and sets the source of the data set to be * the supplied File object. * * @param file the source file. * @exception IOException if an error occurs */ public void setSource(File file) throws IOException { m_structure = null; setRetrieval(NONE); if (file == null) { throw new IOException("Source file object is null!"); } String fname = file.getName(); String fileStem; String path = file.getParent(); if (path != null) { path += File.separator; } else { path = ""; } if (fname.indexOf('.') < 0) { fileStem = fname; fname += ".names"; } else { fileStem = fname.substring(0, fname.lastIndexOf('.')); fname = fileStem + ".names"; } m_fileStem = fileStem; file = new File(path+fname); m_sourceFile = file; try { BufferedReader br = new BufferedReader(new FileReader(file)); m_namesReader = br; } catch (FileNotFoundException ex) { throw new IOException("File not found : "+(path+fname)); } m_sourceFileData = new File(path+fileStem+".data"); try { BufferedReader br = new BufferedReader(new FileReader(m_sourceFileData)); m_dataReader = br; } catch (FileNotFoundException ex) { throw new IOException("File not found : "+(path+fname)); } m_File = file.getAbsolutePath(); } /** * Determines and returns (if possible) the structure (internally the * header) of the data set as an empty set of instances. * * @return the structure of the data set as an empty set of Instances * @exception IOException if an error occurs */ public Instances getStructure() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has beenspecified"); } if (m_structure == null) { setSource(m_sourceFile); StreamTokenizer st = new StreamTokenizer(m_namesReader); initTokenizer(st); readHeader(st); } return m_structure; } /** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == INCREMENTAL) { throw new IOException("Cannot mix getting Instances in both incremental and batch modes"); } setRetrieval(BATCH); if (m_structure == null) { getStructure(); } StreamTokenizer st = new StreamTokenizer(m_dataReader); initTokenizer(st); // st.ordinaryChar('.'); Instances result = new Instances(m_structure); Instance current = getInstance(st); while (current != null) { result.add(current); current = getInstance(st); } try { reset();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -