📄 converterutils.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * ConverterUtils.java * Copyright (C) 2000 University of Waikato, Hamilton, New Zealand * */package weka.core.converters;import weka.core.ClassDiscovery;import weka.core.Instance;import weka.core.Instances;import weka.gui.GenericObjectEditor;import weka.gui.GenericPropertiesCreator;import java.io.File;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.io.Serializable;import java.io.StreamTokenizer;import java.net.URL;import java.util.Arrays;import java.util.Collections;import java.util.Enumeration;import java.util.Hashtable;import java.util.Properties;import java.util.Vector;/** * Utility routines for the converter package. * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.14 $ * @see Serializable */public class ConverterUtils implements Serializable { /** for serialization. */ static final long serialVersionUID = -2460855349276148760L; /** * Helper class for loading data from files and URLs. Via the ConverterUtils * class it determines which converter to use for loading the data into * memory. If the chosen converter is an incremental one, then the data * will be loaded incrementally, otherwise as batch. In both cases the * same interface will be used (<code>hasMoreElements</code>, * <code>nextElement</code>). Before the * data can be read again, one has to call the <code>reset</code> method. * The data source can also be initialized with an Instances object, in * order to provide a unified interface to files and already loaded datasets. * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.14 $ * @see #hasMoreElements(Instances) * @see #nextElement(Instances) * @see #reset() * @see DataSink */ public static class DataSource implements Serializable { /** for serialization. */ private static final long serialVersionUID = -613122395928757332L; /** the file to load. */ protected File m_File; /** the URL to load. */ protected URL m_URL; /** the loader.*/ protected Loader m_Loader; /** whether the loader is incremental. */ protected boolean m_Incremental; /** the instance counter for the batch case. */ protected int m_BatchCounter; /** the last internally read instance. */ protected Instance m_IncrementalBuffer; /** the batch buffer. */ protected Instances m_BatchBuffer; /** * Tries to load the data from the file. Can be either a regular file or * a web location (http://, https://, ftp:// or file://). * * @param location the name of the file to load * @throws Exception if initialization fails */ public DataSource(String location) throws Exception { super(); // file or URL? if ( location.startsWith("http://") || location.startsWith("https://") || location.startsWith("ftp://") || location.startsWith("file://") ) m_URL = new URL(location); else m_File = new File(location); // quick check: is it ARFF? if (isArff(location)) { m_Loader = new ArffLoader(); } else { if (m_File != null) m_Loader = ConverterUtils.getLoaderForFile(location); else m_Loader = ConverterUtils.getURLLoaderForFile(location); // do we have a converter? if (m_Loader == null) throw new IllegalArgumentException("No suitable converter found for '" + location + "'!"); } // incremental loader? m_Incremental = (m_Loader instanceof IncrementalConverter); reset(); } /** * Initializes the datasource with the given dataset. * * @param inst the dataset to use */ public DataSource(Instances inst) { super(); m_BatchBuffer = inst; m_Loader = null; m_File = null; m_URL = null; m_Incremental = false; } /** * Initializes the datasource with the given Loader. * * @param loader the Loader to use */ public DataSource(Loader loader) { super(); m_BatchBuffer = null; m_Loader = loader; m_File = null; m_URL = null; m_Incremental = (m_Loader instanceof IncrementalConverter); initBatchBuffer(); } /** * Initializes the datasource with the given input stream. This stream * is always interpreted as ARFF. * * @param stream the stream to use */ public DataSource(InputStream stream) { super(); m_BatchBuffer = null; m_Loader = new ArffLoader(); try { m_Loader.setSource(stream); } catch (Exception e) { m_Loader = null; } m_File = null; m_URL = null; m_Incremental = (m_Loader instanceof IncrementalConverter); initBatchBuffer(); } /** * initializes the batch buffer if necessary, i.e., for non-incremental * loaders. */ protected void initBatchBuffer() { try { if (!isIncremental()) m_BatchBuffer = m_Loader.getDataSet(); else m_BatchBuffer = null; } catch (Exception e) { e.printStackTrace(); } } /** * returns whether the extension of the location is likely to be of ARFF * format, i.e., ending in ".arff" or ".arff.gz" (case-insensitive). * * @param location the file location to check * @return true if the location seems to be of ARFF format */ public static boolean isArff(String location) { if ( location.toLowerCase().endsWith(ArffLoader.FILE_EXTENSION.toLowerCase()) || location.toLowerCase().endsWith(ArffLoader.FILE_EXTENSION_COMPRESSED.toLowerCase()) ) return true; else return false; } /** * returns whether the loader is an incremental one. * * @return true if the loader is a true incremental one */ public boolean isIncremental() { return m_Incremental; } /** * returns the determined loader, null if the DataSource was initialized * with data alone and not a file/URL. * * @return the loader used for retrieving the data */ public Loader getLoader() { return m_Loader; } /** * returns the full dataset, can be null in case of an error. * * @return the full dataset * @throws Exception if resetting of loader fails */ public Instances getDataSet() throws Exception { Instances result; result = null; // reset the loader reset(); try { if (m_Loader != null) result = m_Loader.getDataSet(); else result = m_BatchBuffer; } catch (Exception e) { e.printStackTrace(); result = null; } return result; } /** * returns the full dataset with the specified class index set, * can be null in case of an error. * * @param classIndex the class index for the dataset * @return the full dataset * @throws Exception if resetting of loader fails */ public Instances getDataSet(int classIndex) throws Exception { Instances result; result = getDataSet(); if (result != null) result.setClassIndex(classIndex); return result; } /** * resets the loader. * * @throws Exception if resetting fails */ public void reset() throws Exception { if (m_File != null) ((AbstractFileLoader) m_Loader).setFile(m_File); else if (m_URL != null) ((URLSourcedLoader) m_Loader).setURL(m_URL.toString()); else if (m_Loader != null) m_Loader.reset(); m_BatchCounter = 0; m_IncrementalBuffer = null; if (m_Loader != null) { if (!isIncremental()) m_BatchBuffer = m_Loader.getDataSet(); else m_BatchBuffer = null; } } /** * returns the structure of the data. * * @return the structure of the data * @throws Exception if something goes wrong */ public Instances getStructure() throws Exception { if (m_Loader != null) return m_Loader.getStructure(); else return new Instances(m_BatchBuffer, 0); } /** * returns the structure of the data, with the defined class index. * * @param classIndex the class index for the dataset * @return the structure of the data * @throws Exception if something goes wrong */ public Instances getStructure(int classIndex) throws Exception { Instances result; result = getStructure(); if (result != null) result.setClassIndex(classIndex); return result; } /** * returns whether there are more Instance objects in the data. * * @param structure the structure of the dataset * @return true if there are more Instance objects * available * @see #nextElement(Instances) */ public boolean hasMoreElements(Instances structure) { boolean result; result = false; if (isIncremental()) { // user still hasn't collected the last one? if (m_IncrementalBuffer != null) { result = true; } else { try { m_IncrementalBuffer = m_Loader.getNextInstance(structure); result = (m_IncrementalBuffer != null); } catch (Exception e) { e.printStackTrace(); result = false; } } } else { result = (m_BatchCounter < m_BatchBuffer.numInstances()); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -