📄 tclass.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//** * * * Single classifier solution. That is to say, we cluster all the instances * using the same clustering algorithms. * * * @author Waleed Kadous * @version $Id: TClass.java,v 1.1.1.1 2002/06/28 07:36:16 waleed Exp $ */package tclass; import java.util.StringTokenizer;import tclass.clusteralg.GClust;import tclass.util.Debug;import weka.attributeSelection.BestFirst;import weka.attributeSelection.CfsSubsetEval;import weka.classifiers.Classifier;import weka.core.Instances;import weka.core.Utils;import weka.filters.AttributeFilter;public class TClass { // Ok. What we are going to do is to separate the learning task in // an interesting way. // First of all, though, the standard stuff String domDescFile = "tclass.tdd"; String trainDataFile = "tclass.tsl"; String testDataFile = "tclass.ttl"; String settingsFile = "tclass.tal"; String learnerStuff = "weka.classifiers.j48.J48"; boolean featureSel = false; boolean makeDesc = false; boolean trainResults = false; void parseArgs(String[] args){ for(int i=0; i < args.length; i++){ if(args[i].equals("-tr")){ trainDataFile = args[++i]; } if(args[i].equals("-dd")){ domDescFile = args[++i]; } if(args[i].equals("-te")){ testDataFile = args[++i]; } if(args[i].equals("-s")){ settingsFile = args[++i]; } if(args[i].equals("-fs")){ featureSel = true; } if(args[i].equals("-md")){ makeDesc = true; } if(args[i].equals("-trainres")){ trainResults = true; } if(args[i].equals("-l")){ learnerStuff = args[++i]; learnerStuff = learnerStuff.replace(':', ' '); System.err.println("Learner String is: " + learnerStuff); } } } // Alright. This is downright funky hacky stuff. public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); TClass thisExp = new TClass(); thisExp.parseArgs(args); DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer eventClusterer = settings.getEventClusterer(); Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); ClusterVecI clusters = eventClusterer.clusterEvents(trainEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete"); Debug.dp(Debug.PROGRESS, "Clusters are:"); Debug.dp(Debug.PROGRESS, "\n" + eventClusterer.getMapping()); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor attribs = new Attributor(domDesc, clusters, eventClusterer.getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr complete."); ClassStreamAttValVecI trainEventAtts =attribs.attribute(trainStreamData, trainEventData); ClassStreamAttValVecI testEventAtts = attribs.attribute(testStreamData, testEventData); Debug.dp(Debug.PROGRESS, "PROGRESS: Attribution complete."); // Combine all data sources. For now, globals go in every // one. Combiner c = new Combiner(); ClassStreamAttValVecI trainAtts = c.combine(trainGlobalData, trainEventAtts); ClassStreamAttValVecI testAtts = c.combine(testGlobalData, testEventAtts); trainStreamData = null; testStreamData = null; trainEventSEV = null; trainEventCV = null; if(!thisExp.makeDesc){ clusters = null; eventClusterer = null; } attribs = null; System.gc();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -