⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sparseformatdatarowreader.java

📁 一个很好的LIBSVM的JAVA源码。对于要研究和改进SVM算法的学者。可以参考。来自数据挖掘工具YALE工具包。
💻 JAVA
字号:
/*
 *  YALE - Yet Another Learning Environment
 *  Copyright (C) 2001-2004
 *      Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, 
 *          Katharina Morik, Oliver Ritthoff
 *      Artificial Intelligence Unit
 *      Computer Science Department
 *      University of Dortmund
 *      44221 Dortmund,  Germany
 *  email: yale-team@lists.sourceforge.net
 *  web:   http://yale.cs.uni-dortmund.de/
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as 
 *  published by the Free Software Foundation; either version 2 of the
 *  License, or (at your option) any later version. 
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 *  General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 *  USA.
 */
package edu.udo.cs.yale.example;

import edu.udo.cs.yale.tools.LogService;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.att.AttributeSet;

import java.util.List;
import java.util.LinkedList;
import java.util.Iterator;
import java.io.IOException;
import java.io.Reader;
import java.io.BufferedReader;
import java.util.StringTokenizer;
import java.util.Map;
import java.util.HashMap;
import java.util.Vector;

/** Reads the data rows in sparse format. The format is specified in the class comment of
 *  {@link edu.udo.cs.yale.operator.io.SparseFormatExampleSource}. {@link Attribute}s may be passed to the reader
 *  in its constructor. If they are ommitted, they are generated on the fly.
 *  In either case, indices are assigned to the attributes. If an {@link ExampleTable}
 *  is generated using instances of this class, the constructor of {@link ExampleTable}
 *  will reassign these indexes.
 *
 *  @author Simon, Ingo
 *  @version $Id: SparseFormatDataRowReader.java,v 2.16 2004/08/27 11:57:32 ingomierswa Exp $
 */
public class SparseFormatDataRowReader extends AbstractDataRowReader {

    /** Names of the formats. */
    public static final String[] FORMAT_NAMES = { "xy", "yx", "prefix", "separate_file", "no_label" };

    /** Label succeeds attributes. */
    public static final int FORMAT_XY            = 0;
    /** Label preceeds attributes. */
    public static final int FORMAT_YX            = 1;
    /** Label has a prefix specified in the prefix map. */
    public static final int FORMAT_PREFIX        = 2;
    /** Label is in separate file. */
    public static final int FORMAT_SEPARATE_FILE = 3;
    /** Label is missing. */
    public static final int FORMAT_NO_LABEL      = 4;


    /** Reader for the labels. */
    private BufferedReader inAttributes, inLabels;

    /** The attribute set with regular and special attributes. */
    private AttributeSet attributeSet = null;

    /** Remember if an end of file has occured. */
    private boolean eof;

    /** Remember if a line has already been read. */
    private boolean lineRead;

    /** The maximum number of attributes to read. */
    private int maxNumber;

    /** Number of lines already read. */
    private int linesRead;

    /** The DataRow that will be returned in the next call to {@link #next()} */
    private DataRow currentDataRow;

    /** One out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX, FORMAT_SEPARATE_FILE, and FORMAT_NO_LABEL. */
    private int format;

    /** The dimension of the examples, i.e. the total number of regular and special attributes. */
    private int dimension;

    /** Maps prefixes to special attribute names, e.g. "l:" to "label". */
    private Map prefixMap = new HashMap();

    /** Creates a new data row reader for sparse format. The attributes indices
     *  must not be set. If they are, they are reassigned new values when this constructor 
     *  is called!
     *
     *  @param factory Factory used to create {@link DataRow} instances.
     *  @param format One Out of FORMAT_XY, FORMAT_YX, FORMAT_PREFIX, and FORMAT_SEPARATE_FILE.
     *  @param prefixMap Maps prefixes to special attribute names (e.g. "l" to "label").
     *  @param attributeSet Set of regular and special attributes.
     *  @param attributeReader Reader for the data
     *  @param labelReader Reader for the labels. Only necessary if format is FORMAT_SEPARATE_FILE.
     *  @param sampleSize sample size, may be -1 for no limit.
     **/
    public SparseFormatDataRowReader(DataRowFactory factory,
				     int format,
				     Map prefixMap,
				     AttributeSet attributeSet,
				     Reader attributeReader,
				     Reader labelReader,
				     int sampleSize) {
	super(factory);
	this.format       = format;
	this.prefixMap    = prefixMap;
	this.attributeSet = attributeSet;
	if (attributeSet == null) {
	    throw new IllegalArgumentException("AttributeSet must not be null.");
	}
	this.dimension    = attributeSet.getAllAttributes().size();
	this.maxNumber    = sampleSize;
	this.inAttributes = new BufferedReader(attributeReader);
	if (format == FORMAT_SEPARATE_FILE) {
	    if (labelReader == null) 
		throw new IllegalArgumentException("labelReader must not be null if format is 'separate_file'!");
	    this.inLabels = new BufferedReader(labelReader);
	}
	if (format != FORMAT_NO_LABEL) {
	    if (attributeSet.getSpecialAttribute("label") == null) {
		throw new IllegalArgumentException("If format is not no_label, label attribute must be defined.");
	    }
	}
    }

    /** Checks if futher examples exist. Returns false if one of the files end. */
    public boolean hasNext(){
	if ((maxNumber > -1) && (linesRead >= maxNumber)) return false;
	if (lineRead) return !eof;
	try {
	    eof = !readLine();
	    if (eof) {
		inAttributes.close();
		if (inLabels != null)
		    inLabels.close();
	    }
	} catch(IOException e) {
	    throw new RuntimeException(e.getMessage(), e);
	}
	lineRead = true;
	return (!eof); 
    }

    private boolean readLine() throws IOException {
	String attributeLine = null;
	do {
	    attributeLine = inAttributes.readLine();
	    if (attributeLine == null) return false;
	} while (attributeLine.startsWith("#") || (attributeLine.length() == 0));

	this.currentDataRow = getFactory().create(dimension);

	StringTokenizer tokenizer = new StringTokenizer(attributeLine);

	String labelString = null;
	if (format == FORMAT_YX) {
	    labelString = tokenizer.nextToken();
	} else if (format == FORMAT_SEPARATE_FILE) {
	    do {
		labelString = inLabels.readLine();
		if (labelString == null) return false;
	    } while (labelString.startsWith("#") || (labelString.length() == 0));
	}

	while (tokenizer.hasMoreTokens()) {
	    String attributeToken = tokenizer.nextToken();

	    int colonIndex = attributeToken.indexOf(':');
	    if ((format == FORMAT_XY) && (colonIndex == -1)) {
		if (labelString != null) {
		    throw new IOException("Malformed line in examplefile: " + attributeToken);
		} else {
		    labelString = attributeToken;
		}
	    } else {
		String pos          = attributeToken.substring(0, colonIndex);// references the attribute
		String value        = attributeToken.substring(colonIndex+1); // the attribute value
		Attribute attribute = null;                                   // the referenced attribute

		try {
		    int index = Integer.parseInt(pos)-1;
		    if ((index < 0) || (index >= attributeSet.getNumberOfRegularAttributes())) 
			throw new IOException("Attribute index out of range: '"+(index + 1)+
					      "'! Index must be between 1 and dimension " + attributeSet.getNumberOfRegularAttributes()+"!");
		    attribute = attributeSet.getAttribute(index);
		} catch (NumberFormatException e) {
		    String specialAttributeName = (String)prefixMap.get(pos);
		    if (specialAttributeName == null) {
			attribute = attributeSet.getSpecialAttribute(pos);
			if (attribute == null)
			    throw new IOException("Illegal attribute index: '"+pos+
						  "' (legal values are integers and defined prefixes for special attributes (Parameter prefix_map of SparseFormatExampleSource))!");

		    } else {
			attribute = attributeSet.getSpecialAttribute(specialAttributeName);
		    }
		    if (attribute == null) 
			throw new IOException("Unknown special attribute: " + specialAttributeName);
		}
		
		if (attribute != null) {
		    if (attribute.isNominal()) {
			currentDataRow.set(attribute, attribute.mapString(value));
		    } else {
			try {
			    currentDataRow.set(attribute, Double.parseDouble(value));
			} catch (NumberFormatException e) {
			    throw new IOException("Attribute is not numerical: '"+value+"'!");
			}
		    }
		}
	    }
	}

	if (labelString != null) {	    
	    Attribute label = attributeSet.getSpecialAttribute("label");
	    if (label.isNominal()) {
		currentDataRow.set(label, label.mapString(labelString));
	    } else {
		try {
		    currentDataRow.set(label, Double.parseDouble(labelString));
		} catch (NumberFormatException e) {
		    throw new IOException("Label is not numerical: '"+labelString+"'.");
		}
	    }
	}
	return true;
    }

    /** Returns the next Example. */
    public DataRow next(){
	if (eof == true) return null;
	if (!lineRead) 
	    if (!hasNext()) 
		return null;
	linesRead++;
	lineRead = false;
	return currentDataRow;
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -