⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sparseformatdatarowreader.java.old

📁 著名的开源仿真软件yale
💻 OLD
字号:
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import java.util.List;import java.util.LinkedList;import java.util.Iterator;import java.io.IOException;import java.io.Reader;import java.io.BufferedReader;import java.util.StringTokenizer;/** Reads the data rows in sparse format. The format is specified in the class comment of *  {@link edu.udo.cs.yale.operator.SparseFormatExampleSource}. {@link Attribute}s may be passed to the reader *  in its constructor. If they are ommitted, they are generated on the fly. *  In either case, indices are assigned to the attributes. If an {@link ExampleTable} *  is generated using instances of this class, the constructor of {@link ExampleTable} *  will reassign these indexes. The list of {@link Attribute}s generated by *  {@link #getAllAttributes()} will be in the correct ordering. * *  @author Simon, Ingo *  @version $Id: SparseFormatDataRowReader.java,v 2.8 2003/05/15 13:36:06 fischer Exp $ */public class SparseFormatDataRowReader extends AbstractDataRowReader {    /** Names of the formats. */    public static final String[] FORMAT_NAMES = { "xy", "yx", "prefix_l", "separate_file" };    /** Label succeeds attributes. */    public static final int FORMAT_XY            = 0;    /** Label preceeds attributes. */    public static final int FORMAT_YX            = 1;    /** Label has prefix 'l:'. */    public static final int FORMAT_PREFIX_L      = 2;    /** Label is in separate file. */    public static final int FORMAT_SEPARATE_FILE = 3;    private static final int NUMBER_OF_SPECIAL_ATTRIBUTES = 4;    /** Reader for the labels. */    private BufferedReader inAttributes, inLabels;    /** The regular attributes. */    private Attribute[] attributes;    /** All attributes, i.e. including special attributes. */    private List allAttributes = new LinkedList();    /** The special attributes. */    private Attribute label, idAttribute, weight;    /** Remember if an end of file has occured. */    private boolean eof;    /** Remember if a line has already been read. */    private boolean lineRead;    /** The maximum number of attributes to read. */    private int maxNumber;    /** Number of lines already read. */    private int linesRead;    /** The DataRow that will be returned in the next call to {@link #next()} */    private DataRow currentDataRow;    /** One out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX_L, and FORMAT_SEPARATE_FILE. */    private int format;    /** Remember is a special attribute actually was used. */    private boolean weightUsed = false, labelUsed = false, idUsed = false;    /** Creates a new data row reader for sparse format. The attributes indices     *  must not be set. If they are, they are reassigned new values when this constructor      *  is called!     *     *  @param dimension Number of regular attributes. Only necessary if attributes is not null.     *                   Otherwise attributes.size() must be equal to dimension.     *  @param attributes List of {@link Attribute}. If null, real attributes are generated.     *  @param label The label attribute. If null, a nominal attribute is generated.     *  @param attributeReader Reader for the data     *  @param labelReader Reader for the labels. Only necessary if format is FORMAT_SEPARATE_FILE.     *  @param format one Out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX_L, and FORMAT_SEPARATE_FILE.     *  @param maxNumber sample size     **/    public SparseFormatDataRowReader(DataRowFactory factory,				     int format,				     int dimension,				     List attributes,				     Attribute label,				     Attribute weight,				     Attribute idAttribute,				     Reader attributeReader,				     Reader labelReader,				     int maxNumber) {	super(factory);	this.format       = format;	this.maxNumber    = maxNumber;	this.inAttributes = new BufferedReader(attributeReader);	if (format == FORMAT_SEPARATE_FILE) {	    if (labelReader == null) 		throw new IllegalArgumentException("labelReader must not be null if format is 'separate_file'!");	    this.inLabels = new BufferedReader(labelReader);	}	if (attributes == null) {	    attributes = new LinkedList();	    for (int i = 0; i < dimension; i++) {		Attribute attribute = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE);		attributes.add(attribute);	    }	} else {	    if ((dimension != -1) && (dimension != attributes.size())) {		throw new IllegalArgumentException("dimension must be equal to number of attributes!");	    }	}	this.attributes = new Attribute[attributes.size()];	attributes.toArray(this.attributes);	for (int i = 0; i < this.attributes.length; i++) {	    registerAttribute(this.attributes[i]);	}	this.label = label;	if (label != null) labelUsed = true;	registerAttribute(label);	ensureLabelExists();	this.weight = weight;	if (weight != null) weightUsed = true;	registerAttribute(weight);	//ensureWeightExists();	this.idAttribute = idAttribute;	if (idAttribute != null) idUsed = true;	registerAttribute(idAttribute);	//ensureIdExists();    }    /** If attribute is not null, it is assigned an index and is added to the list     *  of attributes. */    private void registerAttribute(Attribute attribute) {	if (attribute != null) {	    allAttributes.add(attribute);	}    }    private void ensureLabelExists() {	if (label == null) {	    label = new Attribute(Ontology.NOMINAL, Ontology.SINGLE_VALUE);	    registerAttribute(label);	}    }    private void ensureWeightExists() {	if (weight == null) {	    weight = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE);	    registerAttribute(weight);	}    }    private void ensureIdExists() {	if (idAttribute == null) {	    idAttribute = new Attribute(Ontology.INTEGER, Ontology.SINGLE_VALUE);	    registerAttribute(idAttribute);	}    }    /** Checks if futher examples exist. Returns false if one of the files end. */    public boolean hasNext(){	if ((maxNumber > -1) && (linesRead >= maxNumber)) return false;	if (lineRead) return !eof;	try {	    eof = !readLine();	    if (eof) {		inAttributes.close();		if (inLabels != null)		    inLabels.close();	    }	} catch(IOException e) {	    LogService.logException("SparseFormatDataRowReader.hasNext():", e);	    return false;	}	lineRead = true;	return (!eof);     }    private boolean readLine() throws IOException {	String attributeLine = null;	do {	    attributeLine = inAttributes.readLine();	    if (attributeLine == null) return false;	} while (attributeLine.startsWith("#") || (attributeLine.length() == 0));	this.currentDataRow = getFactory().create(attributes.length+NUMBER_OF_SPECIAL_ATTRIBUTES);	StringTokenizer tokenizer = new StringTokenizer(attributeLine);	String labelString = null;	if (format == FORMAT_YX) {	    labelString = tokenizer.nextToken();	} else if (format == FORMAT_SEPARATE_FILE) {	    do {		labelString = inLabels.readLine();		if (labelString == null) return false;	    } while (labelString.startsWith("#") || (labelString.length() == 0));	}	while (tokenizer.hasMoreTokens()) {	    String attributeToken = tokenizer.nextToken();	    int colonIndex = attributeToken.indexOf(':');	    if ((format == FORMAT_XY) && (colonIndex == -1)) {		if (labelString != null) {		    throw new IOException("Malformed line in examplefile: " + attributeToken);		} else {		    labelString = attributeToken;		}	    } else {		String pos          = attributeToken.substring(0, colonIndex);// references the attribute		String value        = attributeToken.substring(colonIndex+1); // the attribute value		Attribute attribute = null;                                   // the referenced attribute		if ((format == FORMAT_PREFIX_L) && pos.equals("l")) {		    labelString = value; // attribute stayes null, so we dont set anything yet		} else if (pos.equals("w")) {		    ensureWeightExists();		    weightUsed = true;		    attribute = weight;		} else if (pos.equals("id")) {		    ensureIdExists();		    idUsed = true;		    attribute = idAttribute;		} else {		    try {			int index = Integer.parseInt(pos)-1;			attribute = attributes[index];			if ((index < 0) || (index >= attributes.length)) 			    throw new IOException("Attribute index out of range: '"+index+						  "'! Dimension is "+attributes.length+"!");		    } catch (NumberFormatException e) {			throw new IOException("Illegal attribute index: '"+pos+					      "' (legal values are l, w, id, and integers!");		    }		}		if (attribute != null) {		    if (attribute.isNominal()) {			currentDataRow.set(attribute, attribute.mapString(value));		    } else {			try {			    currentDataRow.set(attribute, Double.parseDouble(value));			} catch (NumberFormatException e) {			    throw new IOException("Attribute is not numerical: '"+value+"'!");			}		    }		}	    }	}	if (labelString != null) {	    ensureLabelExists();	    labelUsed = true;	    if (label.isNominal()) {		currentDataRow.set(label, label.mapString(labelString));	    } else {		try {		    currentDataRow.set(label, Double.parseDouble(labelString));		} catch (NumberFormatException e) {		    throw new IOException("Label is not numerical: '"+labelString+"'.");		}	    }	}	return true;    }    /** Returns the next Example. */    public DataRow next(){	if (eof == true) return null;	if (!lineRead) 	    if (!hasNext()) 		return null;	linesRead++;	lineRead = false;	return currentDataRow;    }    /** Returns the regular attributes. */    public Attribute[] getAttributes() { return attributes; }    public Attribute getLabel() { return labelUsed ? label : null; }    public Attribute getWeight() { return weightUsed ? weight : null; }    public Attribute getIdAttribute() { return idUsed ? idAttribute : null; }    /** Returns a list containing all attributes, i.e. including special attributes. */    public List getAllAttributes() { 	return allAttributes;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -