⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sparseformatdatarowreader.java

📁 著名的开源仿真软件yale
💻 JAVA
字号:
/* *  YALE - Yet Another Learning Environment *  Copyright (C) 2002, 2003 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,  *          Katharina Morik, Oliver Ritthoff *      Artificial Intelligence Unit *      Computer Science Department *      University of Dortmund *      44221 Dortmund,  Germany *  email: yale@ls8.cs.uni-dortmund.de *  web:   http://yale.cs.uni-dortmund.de/ * *  This program is free software; you can redistribute it and/or *  modify it under the terms of the GNU General Public License as  *  published by the Free Software Foundation; either version 2 of the *  License, or (at your option) any later version.  * *  This program is distributed in the hope that it will be useful, but *  WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *  General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *  USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.tools.att.AttributeSet;import java.util.List;import java.util.LinkedList;import java.util.Iterator;import java.io.IOException;import java.io.Reader;import java.io.BufferedReader;import java.util.StringTokenizer;import java.util.Map;import java.util.HashMap;import java.util.Vector;/** Reads the data rows in sparse format. The format is specified in the class comment of *  {@link edu.udo.cs.yale.operator.SparseFormatExampleSource}. {@link Attribute}s may be passed to the reader *  in its constructor. If they are ommitted, they are generated on the fly. *  In either case, indices are assigned to the attributes. If an {@link ExampleTable} *  is generated using instances of this class, the constructor of {@link ExampleTable} *  will reassign these indexes. * *  @author Simon, Ingo *  @version $Id: SparseFormatDataRowReader.java,v 2.13 2003/09/10 13:02:06 fischer Exp $ */public class SparseFormatDataRowReader extends AbstractDataRowReader {    /** Names of the formats. */    public static final String[] FORMAT_NAMES = { "xy", "yx", "prefix", "separate_file", "no_label" };    /** Label succeeds attributes. */    public static final int FORMAT_XY            = 0;    /** Label preceeds attributes. */    public static final int FORMAT_YX            = 1;    /** Label has a prefix specified in the prefix map. */    public static final int FORMAT_PREFIX        = 2;    /** Label is in separate file. */    public static final int FORMAT_SEPARATE_FILE = 3;    /** Label is missing. */    public static final int FORMAT_NO_LABEL      = 4;    /** Reader for the labels. */    private BufferedReader inAttributes, inLabels;    /** The attribute set with regular and special attributes. */    private AttributeSet attributeSet = null;    /** Remember if an end of file has occured. */    private boolean eof;    /** Remember if a line has already been read. */    private boolean lineRead;    /** The maximum number of attributes to read. */    private int maxNumber;    /** Number of lines already read. */    private int linesRead;    /** The DataRow that will be returned in the next call to {@link #next()} */    private DataRow currentDataRow;    /** One out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX, FORMAT_SEPARATE_FILE, and FORMAT_NO_LABEL. */    private int format;    /** The dimension of the examples, i.e. the total number of regular and special attributes. */    private int dimension;    /** Maps prefixes to special attribute names, e.g. "l:" to "label". */    private Map prefixMap = new HashMap();    /** Creates a new data row reader for sparse format. The attributes indices     *  must not be set. If they are, they are reassigned new values when this constructor      *  is called!     *     *  @param factory Factory used to create {@link DataRow} instances.     *  @param format One Out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX, and FORMAT_SEPARATE_FILE.     *  @param prefixMap Maps prefixes to special attribute names (e.g. &quot;l&quot; to &quot;label&quot;).     *  @param attributeSet Set of regular and special attributes.     *  @param attributeReader Reader for the data     *  @param labelReader Reader for the labels. Only necessary if format is FORMAT_SEPARATE_FILE.     *  @param sampleSize sample size, may be -1 for no limit.     **/    public SparseFormatDataRowReader(DataRowFactory factory,				     int format,				     Map prefixMap,				     AttributeSet attributeSet,				     Reader attributeReader,				     Reader labelReader,				     int sampleSize) {	super(factory);	this.format       = format;	this.prefixMap    = prefixMap;	this.attributeSet = attributeSet;	if (attributeSet == null) {	    throw new IllegalArgumentException("AttributeSet must not be null.");	}	this.dimension    = attributeSet.getAllAttributes().size();	this.maxNumber    = sampleSize;	this.inAttributes = new BufferedReader(attributeReader);	if (format == FORMAT_SEPARATE_FILE) {	    if (labelReader == null) 		throw new IllegalArgumentException("labelReader must not be null if format is 'separate_file'!");	    this.inLabels = new BufferedReader(labelReader);	}	if (format != FORMAT_NO_LABEL) {	    if (attributeSet.getSpecialAttribute("label") == null) {		throw new IllegalArgumentException("If format is not no_label, label attribute must be defined.");	    }	}    }    /** Checks if futher examples exist. Returns false if one of the files end. */    public boolean hasNext(){	if ((maxNumber > -1) && (linesRead >= maxNumber)) return false;	if (lineRead) return !eof;	try {	    eof = !readLine();	    if (eof) {		inAttributes.close();		if (inLabels != null)		    inLabels.close();	    }	} catch(IOException e) {	    throw new RuntimeException(e.getMessage(), e);	}	lineRead = true;	return (!eof);     }    private boolean readLine() throws IOException {	String attributeLine = null;	do {	    attributeLine = inAttributes.readLine();	    if (attributeLine == null) return false;	} while (attributeLine.startsWith("#") || (attributeLine.length() == 0));	this.currentDataRow = getFactory().create(dimension);	StringTokenizer tokenizer = new StringTokenizer(attributeLine);	String labelString = null;	if (format == FORMAT_YX) {	    labelString = tokenizer.nextToken();	} else if (format == FORMAT_SEPARATE_FILE) {	    do {		labelString = inLabels.readLine();		if (labelString == null) return false;	    } while (labelString.startsWith("#") || (labelString.length() == 0));	}	while (tokenizer.hasMoreTokens()) {	    String attributeToken = tokenizer.nextToken();	    int colonIndex = attributeToken.indexOf(':');	    if ((format == FORMAT_XY) && (colonIndex == -1)) {		if (labelString != null) {		    throw new IOException("Malformed line in examplefile: " + attributeToken);		} else {		    labelString = attributeToken;		}	    } else {		String pos          = attributeToken.substring(0, colonIndex);// references the attribute		String value        = attributeToken.substring(colonIndex+1); // the attribute value		Attribute attribute = null;                                   // the referenced attribute		try {		    int index = Integer.parseInt(pos)-1;		    if ((index < 0) || (index >= attributeSet.getNumberOfRegularAttributes())) 			throw new IOException("Attribute index out of range: '"+(index + 1)+					      "'! Index must be between 1 and dimension " + attributeSet.getNumberOfRegularAttributes()+"!");		    attribute = attributeSet.getAttribute(index);		} catch (NumberFormatException e) {		    String specialAttributeName = (String)prefixMap.get(pos);		    if (specialAttributeName == null) {			attribute = attributeSet.getSpecialAttribute(pos);			if (attribute == null)			    throw new IOException("Illegal attribute index: '"+pos+						  "' (legal values are integers and defined prefixes for special attributes (Parameter prefix_map of SparseFormatExampleSource))!");		    } else {			attribute = attributeSet.getSpecialAttribute(specialAttributeName);		    }		    if (attribute == null) 			throw new IOException("Unknown special attribute: " + specialAttributeName);		}				if (attribute != null) {		    if (attribute.isNominal()) {			currentDataRow.set(attribute, attribute.mapString(value));		    } else {			try {			    currentDataRow.set(attribute, Double.parseDouble(value));			} catch (NumberFormatException e) {			    throw new IOException("Attribute is not numerical: '"+value+"'!");			}		    }		}	    }	}	if (labelString != null) {	    	    Attribute label = attributeSet.getSpecialAttribute("label");	    if (label.isNominal()) {		currentDataRow.set(label, label.mapString(labelString));	    } else {		try {		    currentDataRow.set(label, Double.parseDouble(labelString));		} catch (NumberFormatException e) {		    throw new IOException("Label is not numerical: '"+labelString+"'.");		}	    }	}	return true;    }    /** Returns the next Example. */    public DataRow next(){	if (eof == true) return null;	if (!lineRead) 	    if (!hasNext()) 		return null;	linesRead++;	lineRead = false;	return currentDataRow;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -