📄 arffloader.java

📁 矩阵的QR分解算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    ArffLoader.java *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand * */package weka.core.converters;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.SparseInstance;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.Reader;import java.io.StreamTokenizer;import java.io.StringReader;import java.net.URL;import java.text.ParseException;import java.util.zip.GZIPInputStream;/** <!-- globalinfo-start --> * Reads a source that is in arff (attribute relation file format) format. * <p/> <!-- globalinfo-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.19 $ * @see Loader */public class ArffLoader   extends AbstractFileLoader   implements BatchConverter, IncrementalConverter, URLSourcedLoader {  /** for serialization */  static final long serialVersionUID = 2726929550544048587L;    /** the file extension */  public static String FILE_EXTENSION = Instances.FILE_EXTENSION;  /** the extension for compressed files */  public static String FILE_EXTENSION_COMPRESSED = FILE_EXTENSION + ".gz";  /** the url */  protected String m_URL = "http://";  /** The reader for the source file. */  protected transient Reader m_sourceReader = null;  /** The parser for the ARFF file */  protected transient ArffReader m_ArffReader = null;    /**   * Reads data from an ARFF file, either in incremental or batch mode. <p/>   *    * Typical code for batch usage:   * <pre>   * BufferedReader reader = new BufferedReader(new FileReader("/some/where/file.arff"));   * ArffReader arff = new ArffReader(reader);   * Instances data = arff.getData();   * data.setClassIndex(data.numAttributes() - 1);   * </pre>   *    * Typical code for incremental usage:   * <pre>   * BufferedReader reader = new BufferedReader(new FileReader("/some/where/file.arff"));   * ArffReader arff = new ArffReader(reader, 1000);   * Instances data = arff.getStructure();   * data.setClassIndex(data.numAttributes() - 1);   * Instance inst;   * while ((inst = arff.readInstance(data)) != null) {   *   data.add(inst);   * }   * </pre>   *    * @author  Eibe Frank (eibe@cs.waikato.ac.nz)   * @author  Len Trigg (trigg@cs.waikato.ac.nz)   * @author  fracpete (fracpete at waikato dot ac dot nz)   * @version $Revision: 1.19 $   */  public static class ArffReader {    /** the tokenizer for reading the stream */    protected StreamTokenizer m_Tokenizer;        /** Buffer of values for sparse instance */    protected double[] m_ValueBuffer;    /** Buffer of indices for sparse instance */    protected int[] m_IndicesBuffer;    /** the actual data */    protected Instances m_Data;    /** the number of lines read so far */    protected int m_Lines;        /**     * Reads the data completely from the reader. The data can be accessed     * via the <code>getData()</code> method.     *      * @param reader		the reader to use     * @throws IOException	if something goes wrong     * @see			#getData()     */    public ArffReader(Reader reader) throws IOException {      m_Tokenizer = new StreamTokenizer(reader);      initTokenizer();      readHeader(1000);      initBuffers();            Instance inst;      while ((inst = readInstance(m_Data)) != null) {        m_Data.add(inst);      };            compactify();    }        /**     * Reads only the header and reserves the specified space for instances.     * Further instances can be read via <code>readInstance()</code>.     *      * @param reader			the reader to use     * @param capacity 			the capacity of the new dataset      * @throws IOException		if something goes wrong     * @throws IllegalArgumentException	if capacity is negative     * @see				#getStructure()     * @see				#readInstance(Instances)     */    public ArffReader(Reader reader, int capacity) throws IOException {      if (capacity < 0)	throw new IllegalArgumentException("Capacity has to be positive!");      m_Tokenizer = new StreamTokenizer(reader);      initTokenizer();      readHeader(capacity);      initBuffers();    }        /**     * Reads the data without header according to the specified template.     * The data can be accessed via the <code>getData()</code> method.     *      * @param reader		the reader to use     * @param template		the template header     * @param lines		the lines read so far     * @throws IOException	if something goes wrong     * @see			#getData()     */    public ArffReader(Reader reader, Instances template, int lines) throws IOException {      this(reader, template, lines, 100);      Instance inst;      while ((inst = readInstance(m_Data)) != null) {        m_Data.add(inst);      };      compactify();    }        /**     * Initializes the reader without reading the header according to the      * specified template. The data must be read via the      * <code>readInstance()</code> method.     *      * @param reader		the reader to use     * @param template		the template header     * @param lines		the lines read so far     * @param capacity 		the capacity of the new dataset      * @throws IOException	if something goes wrong     * @see			#getData()     */    public ArffReader(Reader reader, Instances template, int lines, int capacity) throws IOException {      m_Lines     = lines;      m_Tokenizer = new StreamTokenizer(reader);      initTokenizer();      m_Data = new Instances(template, capacity);      initBuffers();    }    /**     * initializes the buffers for sparse instances to be read     *      * @see			#m_ValueBuffer     * @see			#m_IndicesBuffer     */    protected void initBuffers() {      m_ValueBuffer = new double[m_Data.numAttributes()];      m_IndicesBuffer = new int[m_Data.numAttributes()];    }        /**     * compactifies the data     */    protected void compactify() {      if (m_Data != null)        m_Data.compactify();    }        /**     * Throws error message with line number and last token read.     *     * @param msg 		the error message to be thrown     * @throws IOException 	containing the error message     */    protected void errorMessage(String msg) throws IOException {      String str = msg + ", read " + m_Tokenizer.toString();      if (m_Lines > 0) {	int line = Integer.parseInt(str.replaceAll(".* line ", ""));	str = str.replaceAll(" line .*", " line " + (m_Lines + line - 1));      }      throw new IOException(str);    }    /**     * returns the current line number     *      * @return			the current line number     */    public int getLineNo() {      return m_Lines + m_Tokenizer.lineno();    }        /**     * Gets next token, skipping empty lines.     *     * @throws IOException 	if reading the next token fails     */    protected void getFirstToken() throws IOException {      while (m_Tokenizer.nextToken() == StreamTokenizer.TT_EOL) {};            if ((m_Tokenizer.ttype == '\'') ||  	(m_Tokenizer.ttype == '"')) {        m_Tokenizer.ttype = StreamTokenizer.TT_WORD;      } else if ((m_Tokenizer.ttype == StreamTokenizer.TT_WORD) &&  	       (m_Tokenizer.sval.equals("?"))){        m_Tokenizer.ttype = '?';      }    }    /**     * Gets index, checking for a premature and of line.     *     * @throws IOException 	if it finds a premature end of line     */    protected void getIndex() throws IOException {      if (m_Tokenizer.nextToken() == StreamTokenizer.TT_EOL) {        errorMessage("premature end of line");      }      if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {        errorMessage("premature end of file");      }    }        /**     * Gets token and checks if its end of line.     *     * @param endOfFileOk 	whether EOF is OK     * @throws IOException 	if it doesn't find an end of line     */    protected void getLastToken(boolean endOfFileOk) throws IOException {      if ((m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&  	((m_Tokenizer.ttype != StreamTokenizer.TT_EOF) || !endOfFileOk)) {        errorMessage("end of line expected");      }    }    /**     * Gets next token, checking for a premature and of line.     *     * @throws IOException 	if it finds a premature end of line     */    protected void getNextToken() throws IOException {      if (m_Tokenizer.nextToken() == StreamTokenizer.TT_EOL) {        errorMessage("premature end of line");      }      if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) {        errorMessage("premature end of file");      } else if ((m_Tokenizer.ttype == '\'') ||  	       (m_Tokenizer.ttype == '"')) {        m_Tokenizer.ttype = StreamTokenizer.TT_WORD;      } else if ((m_Tokenizer.ttype == StreamTokenizer.TT_WORD) &&  	       (m_Tokenizer.sval.equals("?"))){        m_Tokenizer.ttype = '?';      }    }  	    /**     * Initializes the StreamTokenizer used for reading the ARFF file.     */    protected void initTokenizer(){      m_Tokenizer.resetSyntax();               m_Tokenizer.whitespaceChars(0, ' ');          m_Tokenizer.wordChars(' '+1,'\u00FF');      m_Tokenizer.whitespaceChars(',',',');      m_Tokenizer.commentChar('%');      m_Tokenizer.quoteChar('"');      m_Tokenizer.quoteChar('\'');      m_Tokenizer.ordinaryChar('{');      m_Tokenizer.ordinaryChar('}');      m_Tokenizer.eolIsSignificant(true);    }        /**     * Reads a single instance using the tokenizer and returns it.      *     * @param structure 	the dataset header information, will get updated      * 				in case of string or relational attributes     * @return 			null if end of file has been reached     * @throws IOException 	if the information is not read      * successfully     */     public Instance readInstance(Instances structure) throws IOException {      return readInstance(structure, true);    }
12 3 下一页
💿 文件大小 531 K
👤 上传用户 bobey
📂 所属分类数学计算
🏷️ 相关标签

#矩阵 #分解 #算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -