📄 sparseformatexamplesource.java
字号:
/*
* YALE - Yet Another Learning Environment
* Copyright (C) 2001-2004
* Simon Fischer, Ralf Klinkenberg, Ingo Mierswa,
* Katharina Morik, Oliver Ritthoff
* Artificial Intelligence Unit
* Computer Science Department
* University of Dortmund
* 44221 Dortmund, Germany
* email: yale-team@lists.sourceforge.net
* web: http://yale.cs.uni-dortmund.de/
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA.
*/
package edu.udo.cs.yale.operator.io;
import edu.udo.cs.yale.operator.Operator;
import edu.udo.cs.yale.operator.IOObject;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.example.DataRowReader;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.SparseFormatDataRowReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleTable;
import edu.udo.cs.yale.example.DataRowFactory;
import edu.udo.cs.yale.example.MemoryExampleTable;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.att.AttributeDataSource;
import edu.udo.cs.yale.tools.att.AttributeSet;
import java.util.StringTokenizer;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.io.FileNotFoundException;
import java.io.IOException;
/** Reads an example file in sparse format, i.e. lines have the form<br/>
* <center><pre>label index:value index:value index:value...</pre></center>
* Index may be an integer for the regular attributes or one of the prefixes
* specified by the parameter list <code>prefix_map</code>.
* Four possible <code>format</code>s are supported
* <dl>
* <dt>format_xy:</dt><dd>The label is the last token in each line</dd>
* <dt>format_yx:</dt><dd>The label is the first token in each line</dd>
* <dt>format_prefix:</dt><dd>The label is prefixed by 'l:'</dd>
* <dt>format_separate_file:</dt><dd>The label is read from a separate file
* specified by <code>label_file</code></dd>
* <dt>no_label:</dt><dd>The example set is unlabeled.</dd>
* </dl>
* A detailed introduction to the sparse file format is given in
* section {@yale.ref sec:sparse_format|First steps/File formats/Data files}.
*
* @see SparseFormatDataRowReader
* @yale.xmlclass SparseFormatExampleSource
* @version $Id: SparseFormatExampleSource.java,v 1.2 2004/08/27 11:57:37 ingomierswa Exp $
*/
public class SparseFormatExampleSource extends Operator {
private static final Class[] INPUT_CLASSES = {};
private static final Class[] OUTPUT_CLASSES = { ExampleSet.class };
public IOObject[] apply() throws OperatorException {
int format = getParameterAsInt("format");
// +++++++++ special attribute prefix map +++++++++++++++
Map prefixMap = new HashMap();
Iterator p = getParameterList("prefix_map").iterator();
while (p.hasNext()) {
Object[] prefixMapping = (Object[])p.next();
prefixMap.put(prefixMapping[0], prefixMapping[1]);
}
// +++++++++ attribute creation +++++++++++++++++++++++++
String attributeFileName = getParameterAsString("attribute_file");
File attributeFile = attributeFileName != null ? getExperiment().resolveFileName(attributeFileName) : null;
String attributeDescriptionFile = getParameterAsString("attributes");
AttributeSet attributeSet = null;
if (attributeDescriptionFile != null) {
try {
attributeSet = new AttributeSet(getExperiment().resolveFileName(attributeDescriptionFile), false);
} catch (Throwable e) {
throw new UserError(this, e, 302, new Object[] { attributeDescriptionFile, e.getMessage() });
}
if ((attributeFile != null) && (attributeSet.getDefaultSource() != null) &&
(!attributeFile.equals(attributeSet.getDefaultSource()))) {
LogService.logMessage("Attribute file names specified by parameter 'attribute_file' and default_source specified in '"+attributeDescriptionFile+"' do not match! Assuming the latter to be correct.", LogService.WARNING);
}
if ((format != SparseFormatDataRowReader.FORMAT_NO_LABEL) &&
(attributeSet.getSpecialAttribute("label") == null)) {
throw new UserError(this, 114, new Object[0] );
}
LogService.logMessage("Found "+attributeSet.getNumberOfRegularAttributes()+" regular attributes.",
LogService.MINIMUM);
attributeFile = attributeSet.getDefaultSource();
} else {
int dimension = getParameterAsInt("dimension");
attributeSet = new AttributeSet(dimension);
for (int i = 0; i < dimension; i++) {
Attribute attribute = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE);
attributeSet.addAttribute(attribute);
}
p = prefixMap.values().iterator();
while (p.hasNext()) {
String specialName = (String)p.next();
attributeSet.setSpecialAttribute(specialName, new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE));
}
if (format != SparseFormatDataRowReader.FORMAT_NO_LABEL) {
attributeSet.setSpecialAttribute("label", new Attribute(Ontology.NOMINAL, Ontology.SINGLE_VALUE));
}
}
if (attributeFile == null) {
throw new UserError(this, 902, new Object[0]);
}
// +++++++++++++ reader +++++++++++++++++++++++++++++++++
Reader inAttributes = null;
Reader inLabels = null;
try {
inAttributes = new FileReader(attributeFile);
} catch (IOException e) {
throw new UserError(this, e, 302, new Object[] { attributeFile, e.getMessage() });
}
String labelFile = null;
if (format == SparseFormatDataRowReader.FORMAT_SEPARATE_FILE) {
labelFile = getParameterAsString("label_file");
if (labelFile == null) {
throw new UserError(this, 201, new Object[] {"format", SparseFormatDataRowReader.FORMAT_NAMES[SparseFormatDataRowReader.FORMAT_SEPARATE_FILE], "label_file"});
}
try {
inLabels = (labelFile != null) ? new FileReader(getExperiment().resolveFileName(labelFile)) : null;
} catch (IOException e) {
throw new UserError(this, e, 302, new Object[] { labelFile, e.getMessage() });
}
}
MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes());
SparseFormatDataRowReader reader = new SparseFormatDataRowReader(new DataRowFactory(getParameterAsInt("datamanagement")),
format,
prefixMap,
attributeSet,
inAttributes,
inLabels,
getParameterAsInt("sample_size"));
table.readExamples(reader);
return new IOObject[] { table.createExampleSet(attributeSet) };
}
public Class[] getInputClasses() { return INPUT_CLASSES; }
public Class[] getOutputClasses() { return OUTPUT_CLASSES; }
public List getParameterTypes() {
List types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory("format", "Format of the sparse data file.", SparseFormatDataRowReader.FORMAT_NAMES, 0);
type.setExpert(false);
types.add(type);
type = new ParameterTypeFile("attributes", "Name of the attribute description file.", true);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeFile("attribute_file", "Name of the data file. Only necessary if not specified in the attribute description file.", true));
types.add(new ParameterTypeFile("label_file", "Name of the data file containing the labels. Only necessary if format is 'format_separate_file'.", true));
types.add(new ParameterTypeInt("dimension", "Dimension of the example space. Only necessary if parameter 'attributes' is not set.", -1, Integer.MAX_VALUE, -1));
types.add(new ParameterTypeList("prefix_map", "Maps prefixes to names of special attributes.", new ParameterTypeString("special_attribute", "Maps prefixes to names of special attributes.")));
//types.add(new ParameterTypeString("label_classes", "Whitespace separated list of all used labels", "+1 -1"));
types.add(new ParameterTypeInt("sample_size", "The maximum number of examples to read from the data files (-1 = all)", -1, Integer.MAX_VALUE, -1));
types.add(new ParameterTypeCategory("datamanagement", "Determines, how the data is represented internally.",
DataRowFactory.TYPE_NAMES, DataRowFactory.TYPE_SPARSE_MAP));
return types;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -