📄 filedatarowreader.java

📁 著名的开源仿真软件yale
💻 JAVA
字号:
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.tools.att.AttributeDataSource;import java.util.Map;import java.util.HashMap;import java.util.List;import java.util.LinkedList;import java.util.Iterator;import java.io.File;import java.io.FileReader;import java.io.FileNotFoundException;import java.io.IOException;import java.io.StreamTokenizer;/** FileDataRowReader implements a DataRowReade that reads DataRows from a file. * *  @author Simon, Ingo *  @version $Id: FileDataRowReader.java,v 2.4 2003/07/30 13:16:45 fischer Exp $ */public class FileDataRowReader extends AbstractDataRowReader {    private static final int FILE_NR  = 0;    private static final int TOKEN_NR = 1;    /** Reader for the labels. */    private StreamTokenizer[] attributeDataTokenizer;    private Attribute[] attributes;    /** Remember if an end of file has occured. */    private boolean eof;    /** Remember if a line has already been read. */    private boolean lineRead;    /** The maximum number of attributes to read. */    private int maxNumber;    /** The number of lines read. */    private int linesRead = 0;    private String[][] currentData;    /** Array of size [number of attributes][2]. For each attribute i the value of     *  dataSourceIndex[i][FILE_NR] is used as an index to {@link #attributeDataTokenizer} and     *  the value of dataSourceIndex[i][TOKEN_NR] specifies the index of the token to use for      *  attribute i. */    private int[][] dataSourceIndex;    /** Constructs a new FileDataRowReader.      *  @param factory Factory used to create data rows.     *  @param attributeDataSources List of {@link AttributeDataSource}s.     *  @param sampleSize Limit sample to the first sampleSize lines read from files. -1 for no limit.     */    public FileDataRowReader(DataRowFactory factory,			     List attributeDataSources,			     int sampleSize,			     char[] separators,			     char[] commentChars,			     char[] ignoreChars) throws IOException {	super(factory);	this.maxNumber = sampleSize;	this.attributes      = new Attribute[attributeDataSources.size()];	this.dataSourceIndex = new int[attributeDataSources.size()][2];	List tokenizerList = new LinkedList();	// map all files used to indices	Map fileMap = new HashMap();	Iterator i = attributeDataSources.iterator();	int attribute = 0;	int greatestFileIndex = -1;	while (i.hasNext()) {	    AttributeDataSource ads = (AttributeDataSource)i.next();	    attributes[attribute] = ads.getAttribute();	    File file = ads.getFile();	    Integer fileIndex = (Integer)fileMap.get(file);	    // new file found? -> create tokenizer and map to index number	    if (fileIndex == null) {		fileIndex = new Integer(++greatestFileIndex);		fileMap.put(file, fileIndex);		tokenizerList.add(makeTokenizer(file, separators, commentChars, ignoreChars));	    }	    dataSourceIndex[attribute][FILE_NR]  = fileIndex.intValue();	    dataSourceIndex[attribute][TOKEN_NR] = ads.getColumn();	    attribute++;	}	// determine maximal token index used	this.attributeDataTokenizer = new StreamTokenizer[tokenizerList.size()];	tokenizerList.toArray(this.attributeDataTokenizer);	int[] maxTokenIndex = new int[this.attributeDataTokenizer.length];	for (attribute = 0; attribute < dataSourceIndex.length; attribute++) {	    if (dataSourceIndex[attribute][TOKEN_NR] > maxTokenIndex[dataSourceIndex[attribute][FILE_NR]]) {		maxTokenIndex[dataSourceIndex[attribute][FILE_NR]] = dataSourceIndex[attribute][TOKEN_NR];	    }	}	// create temporary string array to store tokens in	currentData = new String[this.attributeDataTokenizer.length][];	for (int t = 0; t < maxTokenIndex.length; t++) {	    currentData[t] = new String[maxTokenIndex[t]+1];	}    }    //      /** Constructs a new FileDataRowReader that reads from the given tokenizers.//       *  @param attributeTokenizer an array of tokenizers, one for each file//       *  @param dataSourceIndex array of size [attributeList.size()][2]. For each attribute i//       *                         it gives the number of the tokenizer dataSourceIndex[i][0] and//       *                         the number of the token dataSourceIndex[i][1] to read the data from. *///      public FileDataRowReader(DataRowFactory factory,//  			     List attributeList,//  			     StreamTokenizer[] attributeDataTokenizer,//  			     int[][] dataSourceIndex,//  			     int maxNumber) {//  	super(factory);//  	this.lineRead               = false;//  	this.maxNumber              = maxNumber;//  	this.dataSourceIndex        = dataSourceIndex;//  	this.attributeDataTokenizer = attributeDataTokenizer;//  	this.linesRead              = 0;//  	this.attributes             = new Attribute[attributeList.size()];//  	attributeList.toArray(this.attributes);//  	this.currentData = new String[attributeDataTokenizer.length][];//  	for (int i = 0; i < currentData.length; i++) {//  	    int maxCol = 0;//  	    for (int j = 0;j < dataSourceIndex.length; j++)//  		if (dataSourceIndex[j][0] == i)//  		    maxCol = Math.max(maxCol, dataSourceIndex[j][1]);//  	    this.currentData[i] = new String[maxCol+1];//  	}//      }    /** Delivers a <tt>StreamTokenizer</tt> with the default syntax and additionally some syntax enhancements      *  done by the user.     */    public static StreamTokenizer makeTokenizer(File file,						char[] separators,						char[] commentChars,						char[] ignoreChars) throws FileNotFoundException {	StreamTokenizer tokenizer = null;	if (file != null) {	    tokenizer = new StreamTokenizer(new FileReader(file));	    	    // resets the syntax of the tokenizer.	    tokenizer.resetSyntax();	    	    // token characters	    tokenizer.wordChars(128, 255); // schmuh	    tokenizer.wordChars('a', 'z'); // a-z	    tokenizer.wordChars('A', 'Z'); // A-Z	    tokenizer.wordChars('0', '9'); // 0-9	    tokenizer.wordChars('+', '+'); // +	    tokenizer.wordChars('-', '-'); // -	    tokenizer.wordChars('_', '_'); // _	    tokenizer.wordChars('.', '.'); // .	    tokenizer.wordChars('?', '?'); // ?	    // default whitespace characters	    tokenizer.whitespaceChars('\u0000', '\u0020'); // whitespace schmuh	    // additionally whitespace characters (default is comma)	    for (int i = 0; i < separators.length; i++) {		tokenizer.ordinaryChar(separators[i]);		tokenizer.whitespaceChars(separators[i], separators[i]);	    }	    	    // ignore characters (none by default)	    for (int i = 0; i < ignoreChars.length; i++) {		tokenizer.ordinaryChar(ignoreChars[i]);		tokenizer.whitespaceChars(ignoreChars[i], ignoreChars[i]); 	    }	    	    // quote characters	    tokenizer.quoteChar('"'); 	    tokenizer.quoteChar('\'');	    // comment characters	    for (int i = 0; i < commentChars.length; i++) {		tokenizer.ordinaryChar(commentChars[i]);		tokenizer.commentChar(commentChars[i]);	    }	    tokenizer.eolIsSignificant(true);  // end of line is significant and can be asked by TT_EOL	}	return tokenizer;    }    public boolean hasNext(){	if ((maxNumber > -1) && (linesRead >= maxNumber)) return false;	if (lineRead) return !eof;	try {	    eof = !readLine();	} catch(IOException e) {	    LogService.logException("FileDataRowReader.hasNext():", e);	    return false;	}	lineRead = true;	return (!eof);     }    /** Reads a line of data from all tokenizers. */    private boolean readLine() throws IOException {	for (int i = 0; i < attributeDataTokenizer.length; i++) {	    int column = 0;	    boolean eol = false;	    while (!eol && (column < this.currentData[i].length)) {		attributeDataTokenizer[i].nextToken();		if (attributeDataTokenizer[i].ttype == attributeDataTokenizer[i].TT_EOF) return false;		if (attributeDataTokenizer[i].ttype == attributeDataTokenizer[i].TT_EOL) {		    if (column != 0) {			eol = true;		    }		} else {		    this.currentData[i][column++] = attributeDataTokenizer[i].sval;		}	    }	    if (!eol) {		while (attributeDataTokenizer[i].ttype != attributeDataTokenizer[i].TT_EOL)		    attributeDataTokenizer[i].nextToken();	    }			}	return true;    }    /** Returns the next Example. */    public DataRow next(){	if (eof == true) return null;	if (!lineRead) 	    if (!hasNext()) 		return null;	String[] data = new String[attributes.length];	for (int i = 0; i < attributes.length; i++) {	    if (dataSourceIndex[i][1] == -1) {		data[i] = null;	    } else {		data[i] = currentData[dataSourceIndex[i][0]][dataSourceIndex[i][1]];	    }	}	DataRow dataRow = getFactory().create(data, attributes);	linesRead++;	lineRead = false;	return dataRow;    }}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -