📄 sparseformatdatarowreader.java.old
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import java.util.List;import java.util.LinkedList;import java.util.Iterator;import java.io.IOException;import java.io.Reader;import java.io.BufferedReader;import java.util.StringTokenizer;/** Reads the data rows in sparse format. The format is specified in the class comment of * {@link edu.udo.cs.yale.operator.SparseFormatExampleSource}. {@link Attribute}s may be passed to the reader * in its constructor. If they are ommitted, they are generated on the fly. * In either case, indices are assigned to the attributes. If an {@link ExampleTable} * is generated using instances of this class, the constructor of {@link ExampleTable} * will reassign these indexes. The list of {@link Attribute}s generated by * {@link #getAllAttributes()} will be in the correct ordering. * * @author Simon, Ingo * @version $Id: SparseFormatDataRowReader.java,v 2.8 2003/05/15 13:36:06 fischer Exp $ */public class SparseFormatDataRowReader extends AbstractDataRowReader { /** Names of the formats. */ public static final String[] FORMAT_NAMES = { "xy", "yx", "prefix_l", "separate_file" }; /** Label succeeds attributes. */ public static final int FORMAT_XY = 0; /** Label preceeds attributes. */ public static final int FORMAT_YX = 1; /** Label has prefix 'l:'. */ public static final int FORMAT_PREFIX_L = 2; /** Label is in separate file. */ public static final int FORMAT_SEPARATE_FILE = 3; private static final int NUMBER_OF_SPECIAL_ATTRIBUTES = 4; /** Reader for the labels. */ private BufferedReader inAttributes, inLabels; /** The regular attributes. */ private Attribute[] attributes; /** All attributes, i.e. including special attributes. */ private List allAttributes = new LinkedList(); /** The special attributes. */ private Attribute label, idAttribute, weight; /** Remember if an end of file has occured. */ private boolean eof; /** Remember if a line has already been read. */ private boolean lineRead; /** The maximum number of attributes to read. */ private int maxNumber; /** Number of lines already read. */ private int linesRead; /** The DataRow that will be returned in the next call to {@link #next()} */ private DataRow currentDataRow; /** One out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX_L, and FORMAT_SEPARATE_FILE. */ private int format; /** Remember is a special attribute actually was used. */ private boolean weightUsed = false, labelUsed = false, idUsed = false; /** Creates a new data row reader for sparse format. The attributes indices * must not be set. If they are, they are reassigned new values when this constructor * is called! * * @param dimension Number of regular attributes. Only necessary if attributes is not null. * Otherwise attributes.size() must be equal to dimension. * @param attributes List of {@link Attribute}. If null, real attributes are generated. * @param label The label attribute. If null, a nominal attribute is generated. * @param attributeReader Reader for the data * @param labelReader Reader for the labels. Only necessary if format is FORMAT_SEPARATE_FILE. * @param format one Out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX_L, and FORMAT_SEPARATE_FILE. * @param maxNumber sample size **/ public SparseFormatDataRowReader(DataRowFactory factory, int format, int dimension, List attributes, Attribute label, Attribute weight, Attribute idAttribute, Reader attributeReader, Reader labelReader, int maxNumber) { super(factory); this.format = format; this.maxNumber = maxNumber; this.inAttributes = new BufferedReader(attributeReader); if (format == FORMAT_SEPARATE_FILE) { if (labelReader == null) throw new IllegalArgumentException("labelReader must not be null if format is 'separate_file'!"); this.inLabels = new BufferedReader(labelReader); } if (attributes == null) { attributes = new LinkedList(); for (int i = 0; i < dimension; i++) { Attribute attribute = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE); attributes.add(attribute); } } else { if ((dimension != -1) && (dimension != attributes.size())) { throw new IllegalArgumentException("dimension must be equal to number of attributes!"); } } this.attributes = new Attribute[attributes.size()]; attributes.toArray(this.attributes); for (int i = 0; i < this.attributes.length; i++) { registerAttribute(this.attributes[i]); } this.label = label; if (label != null) labelUsed = true; registerAttribute(label); ensureLabelExists(); this.weight = weight; if (weight != null) weightUsed = true; registerAttribute(weight); //ensureWeightExists(); this.idAttribute = idAttribute; if (idAttribute != null) idUsed = true; registerAttribute(idAttribute); //ensureIdExists(); } /** If attribute is not null, it is assigned an index and is added to the list * of attributes. */ private void registerAttribute(Attribute attribute) { if (attribute != null) { allAttributes.add(attribute); } } private void ensureLabelExists() { if (label == null) { label = new Attribute(Ontology.NOMINAL, Ontology.SINGLE_VALUE); registerAttribute(label); } } private void ensureWeightExists() { if (weight == null) { weight = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE); registerAttribute(weight); } } private void ensureIdExists() { if (idAttribute == null) { idAttribute = new Attribute(Ontology.INTEGER, Ontology.SINGLE_VALUE); registerAttribute(idAttribute); } } /** Checks if futher examples exist. Returns false if one of the files end. */ public boolean hasNext(){ if ((maxNumber > -1) && (linesRead >= maxNumber)) return false; if (lineRead) return !eof; try { eof = !readLine(); if (eof) { inAttributes.close(); if (inLabels != null) inLabels.close(); } } catch(IOException e) { LogService.logException("SparseFormatDataRowReader.hasNext():", e); return false; } lineRead = true; return (!eof); } private boolean readLine() throws IOException { String attributeLine = null; do { attributeLine = inAttributes.readLine(); if (attributeLine == null) return false; } while (attributeLine.startsWith("#") || (attributeLine.length() == 0)); this.currentDataRow = getFactory().create(attributes.length+NUMBER_OF_SPECIAL_ATTRIBUTES); StringTokenizer tokenizer = new StringTokenizer(attributeLine); String labelString = null; if (format == FORMAT_YX) { labelString = tokenizer.nextToken(); } else if (format == FORMAT_SEPARATE_FILE) { do { labelString = inLabels.readLine(); if (labelString == null) return false; } while (labelString.startsWith("#") || (labelString.length() == 0)); } while (tokenizer.hasMoreTokens()) { String attributeToken = tokenizer.nextToken(); int colonIndex = attributeToken.indexOf(':'); if ((format == FORMAT_XY) && (colonIndex == -1)) { if (labelString != null) { throw new IOException("Malformed line in examplefile: " + attributeToken); } else { labelString = attributeToken; } } else { String pos = attributeToken.substring(0, colonIndex);// references the attribute String value = attributeToken.substring(colonIndex+1); // the attribute value Attribute attribute = null; // the referenced attribute if ((format == FORMAT_PREFIX_L) && pos.equals("l")) { labelString = value; // attribute stayes null, so we dont set anything yet } else if (pos.equals("w")) { ensureWeightExists(); weightUsed = true; attribute = weight; } else if (pos.equals("id")) { ensureIdExists(); idUsed = true; attribute = idAttribute; } else { try { int index = Integer.parseInt(pos)-1; attribute = attributes[index]; if ((index < 0) || (index >= attributes.length)) throw new IOException("Attribute index out of range: '"+index+ "'! Dimension is "+attributes.length+"!"); } catch (NumberFormatException e) { throw new IOException("Illegal attribute index: '"+pos+ "' (legal values are l, w, id, and integers!"); } } if (attribute != null) { if (attribute.isNominal()) { currentDataRow.set(attribute, attribute.mapString(value)); } else { try { currentDataRow.set(attribute, Double.parseDouble(value)); } catch (NumberFormatException e) { throw new IOException("Attribute is not numerical: '"+value+"'!"); } } } } } if (labelString != null) { ensureLabelExists(); labelUsed = true; if (label.isNominal()) { currentDataRow.set(label, label.mapString(labelString)); } else { try { currentDataRow.set(label, Double.parseDouble(labelString)); } catch (NumberFormatException e) { throw new IOException("Label is not numerical: '"+labelString+"'."); } } } return true; } /** Returns the next Example. */ public DataRow next(){ if (eof == true) return null; if (!lineRead) if (!hasNext()) return null; linesRead++; lineRead = false; return currentDataRow; } /** Returns the regular attributes. */ public Attribute[] getAttributes() { return attributes; } public Attribute getLabel() { return labelUsed ? label : null; } public Attribute getWeight() { return weightUsed ? weight : null; } public Attribute getIdAttribute() { return idUsed ? idAttribute : null; } /** Returns a list containing all attributes, i.e. including special attributes. */ public List getAllAttributes() { return allAttributes; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -