⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sparseformatexamplesource.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
字号:
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.operator.io;

import edu.udo.cs.yale.operator.Operator;
import edu.udo.cs.yale.operator.IOObject;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.UserError;
import edu.udo.cs.yale.operator.parameter.*;
import edu.udo.cs.yale.example.DataRowReader;
import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.SparseFormatDataRowReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleTable;
import edu.udo.cs.yale.example.DataRowFactory;
import edu.udo.cs.yale.example.MemoryExampleTable;
import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.att.AttributeDataSource;
import edu.udo.cs.yale.tools.att.AttributeSet;

import java.util.StringTokenizer;
import java.util.List;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.io.FileNotFoundException;
import java.io.IOException;

/** Reads an example file in sparse format, i.e. lines have the form<br/>
 *  <center><pre>label index:value index:value index:value...</pre></center>
 *  Index may be an integer for the regular attributes or one of the prefixes
 *  specified by the parameter list <code>prefix_map</code>.
 *  Four possible <code>format</code>s are supported
 *  <dl>
 *    <dt>format_xy:</dt><dd>The label is the last token in each line</dd>
 *    <dt>format_yx:</dt><dd>The label is the first token in each line</dd>
 *    <dt>format_prefix:</dt><dd>The label is prefixed by 'l:'</dd>
 *    <dt>format_separate_file:</dt><dd>The label is read from a separate file
 *                                     specified by <code>label_file</code></dd>
 *    <dt>no_label:</dt><dd>The example set is unlabeled.</dd>
 *  </dl>
 *  A detailed introduction to the sparse file format is given in 
 *  section {@yale.ref sec:sparse_format|First steps/File formats/Data files}.
 *
 *  @see SparseFormatDataRowReader
 *  @yale.xmlclass SparseFormatExampleSource
 *  @version $Id: SparseFormatExampleSource.java,v 1.2 2004/08/27 11:57:37 ingomierswa Exp $
 */
public class SparseFormatExampleSource extends Operator {

    private static final Class[] INPUT_CLASSES = {};
    private static final Class[] OUTPUT_CLASSES = { ExampleSet.class };

    public IOObject[] apply() throws OperatorException {

	int format = getParameterAsInt("format");

	// +++++++++ special attribute prefix map +++++++++++++++
	Map prefixMap = new HashMap();
	Iterator p = getParameterList("prefix_map").iterator();
	while (p.hasNext()) {
	    Object[] prefixMapping = (Object[])p.next(); 
	    prefixMap.put(prefixMapping[0], prefixMapping[1]);
	}


	// +++++++++ attribute creation +++++++++++++++++++++++++
	String attributeFileName = getParameterAsString("attribute_file");
	File attributeFile = attributeFileName != null ? getExperiment().resolveFileName(attributeFileName) : null;
	String attributeDescriptionFile = getParameterAsString("attributes");
	AttributeSet attributeSet = null;
	if (attributeDescriptionFile != null) {
	    try {
		attributeSet = new AttributeSet(getExperiment().resolveFileName(attributeDescriptionFile), false);
	    } catch (Throwable e) {
		throw new UserError(this, e, 302, new Object[] { attributeDescriptionFile, e.getMessage() });
	    }
	    if ((attributeFile != null) && (attributeSet.getDefaultSource() != null) &&
		(!attributeFile.equals(attributeSet.getDefaultSource()))) {
		LogService.logMessage("Attribute file names specified by parameter 'attribute_file' and default_source specified in '"+attributeDescriptionFile+"' do not match! Assuming the latter to be correct.", LogService.WARNING);
	    }
	    if ((format != SparseFormatDataRowReader.FORMAT_NO_LABEL) &&
		(attributeSet.getSpecialAttribute("label") == null)) {
		throw new UserError(this, 114, new Object[0] );
	    }
	    
	    LogService.logMessage("Found "+attributeSet.getNumberOfRegularAttributes()+" regular attributes.", 
				  LogService.MINIMUM);
	    attributeFile = attributeSet.getDefaultSource();
	} else {
	    int dimension = getParameterAsInt("dimension");
	    attributeSet = new AttributeSet(dimension);
	    for (int i = 0; i < dimension; i++) {
		Attribute attribute = new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE);
		attributeSet.addAttribute(attribute);
	    }
	    p = prefixMap.values().iterator();
	    while (p.hasNext()) {
		String specialName = (String)p.next();
		attributeSet.setSpecialAttribute(specialName, new Attribute(Ontology.REAL, Ontology.SINGLE_VALUE));
	    }
	    if (format != SparseFormatDataRowReader.FORMAT_NO_LABEL) {
		attributeSet.setSpecialAttribute("label", new Attribute(Ontology.NOMINAL, Ontology.SINGLE_VALUE));
	    }
	}

	if (attributeFile == null) { 
	    throw new UserError(this, 902, new Object[0]);
	}

	// +++++++++++++ reader +++++++++++++++++++++++++++++++++
	Reader inAttributes = null;
	Reader inLabels = null;
	try {
	    inAttributes = new FileReader(attributeFile);
	} catch (IOException e) {
	    throw new UserError(this, e, 302, new Object[] { attributeFile, e.getMessage() });
	}
	String labelFile = null;
	if (format == SparseFormatDataRowReader.FORMAT_SEPARATE_FILE) {
	    labelFile = getParameterAsString("label_file");
	    if (labelFile == null) { 
		throw new UserError(this, 201, new Object[] {"format", SparseFormatDataRowReader.FORMAT_NAMES[SparseFormatDataRowReader.FORMAT_SEPARATE_FILE], "label_file"});
	    }
	    try {
		inLabels = (labelFile != null) ? new FileReader(getExperiment().resolveFileName(labelFile)) : null;
	    } catch (IOException e) {
		throw new UserError(this, e, 302, new Object[] { labelFile, e.getMessage() });
	    }
	}

	MemoryExampleTable table = new MemoryExampleTable(attributeSet.getAllAttributes());

	SparseFormatDataRowReader reader = new SparseFormatDataRowReader(new DataRowFactory(getParameterAsInt("datamanagement")),
									 format,
									 prefixMap,
									 attributeSet,
									 inAttributes,
									 inLabels,
									 getParameterAsInt("sample_size"));
	table.readExamples(reader);

	return new IOObject[] { table.createExampleSet(attributeSet) };
    }

    public Class[] getInputClasses() { return INPUT_CLASSES; }
    public Class[] getOutputClasses() { return OUTPUT_CLASSES; }

    public List getParameterTypes() {
	List types = super.getParameterTypes();
	ParameterType type = new ParameterTypeCategory("format", "Format of the sparse data file.", SparseFormatDataRowReader.FORMAT_NAMES, 0);
	type.setExpert(false);
	types.add(type);
	type = new ParameterTypeFile("attributes", "Name of the attribute description file.", true);
	type.setExpert(false);
	types.add(type);
	types.add(new ParameterTypeFile("attribute_file", "Name of the data file. Only necessary if not specified in the attribute description file.", true));
	types.add(new ParameterTypeFile("label_file", "Name of the data file containing the labels. Only necessary if format is 'format_separate_file'.", true));
	types.add(new ParameterTypeInt("dimension", "Dimension of the example space. Only necessary if parameter 'attributes' is not set.", -1, Integer.MAX_VALUE, -1));
	types.add(new ParameterTypeList("prefix_map", "Maps prefixes to names of special attributes.", new ParameterTypeString("special_attribute", "Maps prefixes to names of special attributes.")));
	//types.add(new ParameterTypeString("label_classes", "Whitespace separated list of all used labels", "+1 -1"));
	types.add(new ParameterTypeInt("sample_size", "The maximum number of examples to read from the data files (-1 = all)", -1, Integer.MAX_VALUE, -1));
	types.add(new ParameterTypeCategory("datamanagement", "Determines, how the data is represented internally.",
					    DataRowFactory.TYPE_NAMES, DataRowFactory.TYPE_SPARSE_MAP));
	return types;
    }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -