📄 batchedfilteredexampleset.java
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.example;import edu.udo.cs.yale.MethodNotSupportedException;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.TempFileService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.example.ConditionExampleReader;import edu.udo.cs.yale.operator.ResultObject;import edu.udo.cs.yale.operator.performance.PerformanceVector;import java.io.File;import java.io.FileNotFoundException;import java.util.Comparator;import java.util.List;import java.util.LinkedList;import java.util.Iterator;import java.util.ArrayList;import java.util.ListIterator;import java.util.Hashtable;import java.util.Enumeration;/** Condition for <tt>BatchedExampleSet</tt> ... * ... requires an attribute named 'batch_index' of type 'integer' ... * ... requires a label attribute whose missing values are marked as 'missing' (= unlabeled examples) ... * ... for filtering out unlabeled examples in a given range of batches (without removing those outside that range) ... */class BatchFilterCondition implements Condition { private ExampleSet exampleSet; // reference to the example set that the condition will be applied to private Attribute batchIndexAttribute; // reference to the batch index attribute, whose value range is to be restricted private int firstBatch, lastBatch; // index value of first and last batch to be filtered, i.e. batch range within which // all unlabeled examples are to be removed private Attribute labelAttribute; // reference to the label attribute (class attribute) public BatchFilterCondition (ExampleSet exampleSet, Attribute batchIndexAttribute, int firstBatch, int lastBatch) { this.exampleSet = exampleSet; this.batchIndexAttribute = batchIndexAttribute; this.firstBatch = firstBatch; this.lastBatch = lastBatch; this.labelAttribute = exampleSet.getLabel(); } public boolean conditionOk (Example example) { if (batchIndexAttribute == null) return true; if (example.getValue(batchIndexAttribute) < ((double)firstBatch)) return true; if (example.getValue(batchIndexAttribute) > ((double)lastBatch)) return true; if ((example.getValueAsString(labelAttribute)).equals("missing")) return false; return true; }}/** Objects of this class manage a set of examples having a batch index attribute * like it is used in time-oriented experiments with batch-wise data processing. * Iterators (<tt>ExampleReader</tt>) provided by this class iterate all examples * that ar either outside a specified range of a first and a last batch index to * be filtered <i>or</i> that are labeled. Unlabeled examples with batch index * attribute values within this time range are skipped (filtered out). * This class is used by <tt>BatchTransductionWindowLearner</tt> in combination with * <tt>BatchedExampleSet</tt> to provide a time window on an example set free of * unlabeled examples. * <p> * As a subclass of <tt>ExampleSet</tt>, this class provides all methods its superclass * does. * </p> * * ... * * Insbesondere besitzen sie dabei fünf Eigenschaften: * <ul> * <li><tt>partition</tt>: Die Partition gibt die Teilmenge (Partition) von allen Beispielen an. Diese Aufteilungen * treten insbesondere bei der Kreuzvalidierung auf und können sogar ineinander * geschachtelt werden. * <li><tt>exampleTable</tt>: Die ExampleTable verwaltet alle Beispiele mit allen Attributen, auch neu generierten. * <li><tt>attributeReferences</tt>: Diese Liste beinhalten Objekte der Klasse <tt>AttributeReference</tt>, welche eine * Spalte in der ExampleTable (den Index) sowie eine mögliche Selektion beinhalten. * <li><tt>performance</tt>: Wurde ein ExampleSet in dieser Form bereits evaluiert, so kann ihr <tt>PerformanceVector</tt> * gesetzt werden. Dies verhindert, das nicht veränderte ExampleSets erneut evaluiert werden. * <li><tt>shouldRecalculateInformationGain</tt>: Wurde dem ExampleSet eine neue Attributreferenz hinzugefuegt oder eine * gelöscht, so zeigt dieses Flag an, daß eine Neuberechnung der * InformationGain Werte noetig ist. * </ul> * * ... * * <h4>Typical use of this class</h4> * This class is typically used for time-oriented experiments, e.g. for simulated * concept drift scenarios (see e.g. class <tt>ConceptDriftSimulator</tt>). * * @see edu.udo.cs.yale.operator.time.ConceptDriftSimulator * * @author Ralf Klinkenberg * @version $Id: BatchedFilteredExampleSet.java,v 2.4 2003/07/03 16:01:29 fischer Exp $ */public class BatchedFilteredExampleSet extends BatchedExampleSet { // public class BatchedFilteredExampleSet extends ExampleSet { /* History: * RK/2002/06/23: first version of class implemented for 'BatchTransductionWindowLearner'; * RK/2003/03/21: file added to LS8 Yale CVS; * Still to do: * -> class comments in English; */ private ExampleSet originalExampleSet; // reference to the original (possibly non-batched) example set (only for get-method) private Attribute batchIndexAttribute; // reference to the attribute to be used as batch index attribute private int firstBatch; // first batch = lower bound of batch index attribute value range for examples to be considered private int lastBatch; // last batch = upper bound of batch index attribute value range for examples to be considered private BatchFilterCondition exampleSelectionCondition; // example filter for iterator ConditionExampleReader /** This constructor creates a shallow copy (clone) of the example set <tt>exampleSet</tt> * and restricts all provided <tt>ExampleReader</tt> instances to iterate only examples * that are either labeled or outside the value range [<tt>firstBatch</tt> .. <tt>lastBatch</tt>] * of the batch index attribute <tt>batchIndexAttribute</tt>. */ public BatchedFilteredExampleSet (BatchedExampleSet exampleSet, Attribute batchIndexAttribute, int firstBatch, int lastBatch) { //// Parameter 'exampleSet': was 'ExampleSet', now is 'BatchedExampleSet'; // RK/2002/06/30 super(exampleSet); this.originalExampleSet = (ExampleSet) exampleSet; this.batchIndexAttribute = batchIndexAttribute; this.firstBatch = firstBatch; this.lastBatch = lastBatch; this.exampleSelectionCondition = new BatchFilterCondition (exampleSet, batchIndexAttribute, firstBatch, lastBatch); }// /** Creates a new example set from a given ExampleTable and AttributeReferences.// * This constructor should not be used, but is provided only for compatibility to // * the corresponding super class constructor.// */// public BatchedFilteredExampleSet (ExampleTable exampleTable, // List attributeReferences, // Attribute label,// Attribute predictedLabel,// Attribute weight,// Attribute cluster) {// super(exampleTable, attributeReferences, label, predictedLabel, weight, cluster);// this.originalExampleSet = null;// this.batchIndexAttribute = null;// this.firstBatch = -1;// this.lastBatch = -1;// this.exampleSelectionCondition = null;// } public BatchedFilteredExampleSet (BatchedFilteredExampleSet exampleSet) { super((BatchedExampleSet)exampleSet); this.originalExampleSet = (ExampleSet) exampleSet; this.batchIndexAttribute = exampleSet.batchIndexAttribute; this.firstBatch = exampleSet.firstBatch; this.lastBatch = exampleSet.lastBatch; exampleSelectionCondition = new BatchFilterCondition (exampleSet, this.batchIndexAttribute, this.firstBatch, this.lastBatch); } public BatchedFilteredExampleSet (BatchedExampleSet exampleSet) { super(exampleSet); this.originalExampleSet = (ExampleSet) exampleSet; this.batchIndexAttribute = exampleSet.getBatchIndexAttribute(); this.firstBatch = exampleSet.getFirstBatch(); this.lastBatch = exampleSet.getLastBatch(); exampleSelectionCondition = new BatchFilterCondition (exampleSet, this.batchIndexAttribute, this.firstBatch, this.lastBatch); } /* public BatchedFilteredExampleSet (ExampleSet exampleSet) { super(exampleSet); this.originalExampleSet = (ExampleSet) exampleSet; this.batchIndexAttribute = null; this.firstBatch = 0; this.lastBatch = Integer.MAX_VALUE; exampleSelectionCondition = new BatchFilterCondition (exampleSet, this.batchIndexAttribute, this.firstBatch, this.lastBatch); } */ public Object clone() { return new BatchedFilteredExampleSet(this); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -