📄 sparseformatexamplesource.java
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.operator;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.example.DataRowReader;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.SparseFormatDataRowReader;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.example.ExampleTable;import edu.udo.cs.yale.example.DataRowFactory;import edu.udo.cs.yale.example.MemoryExampleTable;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.tools.xml.XMLException;import edu.udo.cs.yale.tools.att.AttributeDataSource;import edu.udo.cs.yale.tools.att.AttributeSet;import java.util.StringTokenizer;import java.util.List;import java.util.Iterator;import java.util.Map;import java.util.HashMap;import java.io.File;import java.io.FileReader;import java.io.Reader;import java.io.FileNotFoundException;import java.io.IOException;/** Reads an example file in sparse format, i.e. lines have the form<br/> * <center><pre>label index:value index:value index:value...</pre></center> * Index may be an integer for the regular attributes or one of the prefixes * specified by the parameter list <code>prefix_map</code>. * Four possible <code>format</code>s are supported * <dl> * <dt>format_xy:</dt><dd>The label is the last token in each line</dd> * <dt>format_yx:</dt><dd>The label is the first token in each line</dd> * <dt>format_prefix:</dt><dd>The label is prefixed by 'l:'</dd> * <dt>format_separate_file:</dt><dd>The label is read from a separate file * specified by <code>label_file</code></dd> * <dt>no_label:</dt><dd>The example set is unlabelled.</dd> * </dl> * A detailed introduction to the sparse file format is given in * section {@yale.ref sec:sparse_format|First steps/File formats/Data files}. * * @see SparseFormatDataRowReader * @yale.xmlclass SparseFormatExampleSource * @version $Id: SparseFormatExampleSource.java,v 2.12 2003/08/28 14:31:06 fischer Exp $ */public class SparseFormatExampleSource extends Operator { private static final Class[] INPUT_CLASSES = {}; private static final Class[] OUTPUT_CLASSES = { ExampleSet.class }; public IOObject[] apply() throws OperatorException { int format = getParameterAsInt("format"); // +++++++++ special attribute prefix map +++++++++++++++ Map prefixMap = new HashMap(); Iterator p = getParameterList("prefix_map").iterator(); while (p.hasNext()) { Object[] prefixMapping = (Object[])p.next(); prefixMap.put(prefixMapping[0], prefixMapping[1]); } // +++++++++ attribute creation +++++++++++++++++++++++++ String attributeFileName = getParameterAsString("attribute_file"); File attributeFile = attributeFileName != null ? getExperiment().resolveFileName(attributeFileName) : null; String attributeDescriptionFile = getParameterAsString("attributes"); AttributeSet attributeSet = null; if (attributeDescriptionFile != null) { try { attributeSet = new AttributeSet(getExperiment().resolveFileName(attributeDescriptionFile), false); } catch (Throwable e) { throw new UserError(this, e, 302, new Object[] { attributeDescriptionFile, e.getMessage() }); } if ((attributeFile != null) && (attributeSet.getDefaultSource() != null) && (!attributeFile.equals(attributeSet.getDefaultSource()))) { LogService.logMessage("Attribute file names specified by parameter 'attribute_file' and default_source specified in '"+attributeDescriptionFile+"' do not match! Assuming the latter to be correct.", LogService.WARNING); } if ((format != SparseFormatDataRowReader.FORMAT_NO_LABEL) && (attributeSet.getSpecialAttribute("label") == null)) { throw new UserError(this, 114, new Object[0] ); } LogService.logMessage("Found "+attributeSet.getNumberOfRegularAttributes()+" regular attributes.", LogService.MINIMUM); attributeFile = attributeSet.getDefaultSource(); } else { int dimension = getParameterAsInt("dimension"); attributeSet = new AttributeSet(dimension); for (int i = 0; i < dimension; i++) { Attribute attribute = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE); attributeSet.addAttribute(attribute); } p = prefixMap.values().iterator(); while (p.hasNext()) { String specialName = (String)p.next(); attributeSet.setSpecialAttribute(specialName, new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE)); } if (format != SparseFormatDataRowReader.FORMAT_NO_LABEL) { attributeSet.setSpecialAttribute("label", new Attribute(Ontology.NOMINAL, Ontology.SINGLE_VALUE)); } } if (attributeFile == null) { throw new UserError(this, 902, new Object[0]); } // +++++++++++++ reader +++++++++++++++++++++++++++++++++ Reader inAttributes = null; Reader inLabels = null; try { inAttributes = new FileReader(attributeFile); } catch (IOException e) { throw new UserError(this, e, 302, new Object[] { attributeFile, e.getMessage() }); } String labelFile = null; if (format == SparseFormatDataRowReader.FORMAT_SEPARATE_FILE) { labelFile = getParameterAsString("label_file"); if (labelFile == null) { throw new UserError(this, 201, new Object[] {"format", SparseFormatDataRowReader.FORMAT_NAMES[SparseFormatDataRowReader.FORMAT_SEPARATE_FILE], "label_file"}); } try { inLabels = (labelFile != null) ? new FileReader(labelFile) : null; } catch (IOException e) { throw new UserError(this, e, 302, new Object[] { labelFile, e.getMessage() }); } } MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes()); SparseFormatDataRowReader reader = new SparseFormatDataRowReader(new DataRowFactory(getParameterAsInt("datamanagement")), format, prefixMap, attributeSet, inAttributes, inLabels, getParameterAsInt("sample_size")); table.readExamples(reader); return new IOObject[] { table.createExampleSet(attributeSet) }; } public Class[] getInputClasses() { return INPUT_CLASSES; } public Class[] getOutputClasses() { return OUTPUT_CLASSES; } public List getParameterTypes() { List types = super.getParameterTypes(); types.add(new ParameterTypeCategory("format", "Format of the sparse data file.", SparseFormatDataRowReader.FORMAT_NAMES, 0)); types.add(new ParameterTypeFile("attributes", "Name of the attribute description file.", true)); types.add(new ParameterTypeFile("attribute_file", "Name of the data file. Only necessary if not specified in the attribute description file.", true)); types.add(new ParameterTypeFile("label_file", "Name of the data file containing the labels. Only necessary if format is 'format_separate_file'.", true)); types.add(new ParameterTypeInt("dimension", "Dimension of the example space. Only necessary if parameter 'attributes' is not set.", -1, Integer.MAX_VALUE, -1)); types.add(new ParameterTypeList("prefix_map", "Maps prefixes to names of special attributes.", new ParameterTypeString("special_attribute", "Maps prefixes to names of special attributes."))); //types.add(new ParameterTypeString("label_classes", "Whitespace separated list of all used labels", "+1 -1")); types.add(new ParameterTypeInt("sample_size", "The maximum number of examples to read from the data files (-1 = all)", -1, Integer.MAX_VALUE, -1)); types.add(new ParameterTypeCategory("datamanagement", "Determines, how the data is represented internally.", DataRowFactory.TYPE_NAMES, DataRowFactory.TYPE_SPARSE_MAP)); return types; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -