📄 expdt_tc2.java
字号:
/** * This is a "hack" class. It's just to help me test out some ideas. * I'm running out of time for this conference and I'm really trying * to put together something stunning in terms of performance. * * @author Waleed Kadous * @version $Id: ExpDT_TC2.java,v 1.1.1.1 2002/06/28 07:36:16 waleed Exp $ */package tclass; import tclass.util.*; // import tclass.learnalg.*; import weka.classifiers.*; import weka.classifiers.j48.*; import weka.attributeSelection.*; import weka.filters.*; import weka.core.*; import java.io.*; class DTClassifier { J48 dt; String name = "j48"; String description = "Decision Tree Classifier"; public DTClassifier(J48 dt){ this.dt = dt; } public String getName(){ return name; } public String getDescription(){ return description; } public void classify(Instance inst, ClassificationI classn) throws Exception { double bestClass = dt.classifyInstance(inst); classn.setPredictedClass((int) bestClass); classn.setPredictedClassConfidence(1); }}public class ExpDT_TC2 { // Ok. What we are going to do is to separate the learning task in // an interesting way. // First of all, though, the standard stuff String domDescFile = "sl.tdd"; String trainDataFile = "sl.tsl"; String testDataFile = "sl.ttl"; // String globalDesc = "test._gc"; // String evExtractDesc = "test._ee"; String evClusterDesc = "test._ec"; String settingsFile = "test.tal"; String numDivs = "5"; boolean featureSel = false; void parseArgs(String[] args){ for(int i=0; i < args.length; i++){ if(args[i].equals("-tr")){ trainDataFile = args[++i]; } if(args[i].equals("-te")){ testDataFile = args[++i]; } if(args[i].equals("-nd")){ numDivs = args[++i]; } if(args[i].equals("-settings")){ settingsFile = args[++i]; System.out.println("Using " + settingsFile + " for settings"); } if(args[i].equals("-fs")){ featureSel = true; } } } public static void main(String[] args) throws Exception { Debug.setDebugLevel(Debug.PROGRESS); ExpDT_TC2 thisExp = new ExpDT_TC2(); thisExp.parseArgs(args); DomDesc domDesc = new DomDesc(thisExp.domDescFile); ClassStreamVecI trainStreamData = new ClassStreamVec(thisExp.trainDataFile, domDesc); ClassStreamVecI testStreamData = new ClassStreamVec(thisExp.testDataFile, domDesc); Debug.dp(Debug.PROGRESS, "PROGRESS: Data read in"); Settings settings = new Settings(thisExp.settingsFile, domDesc); EventExtractor evExtractor = settings.getEventExtractor(); // Global data is likely to be included in every model; so we // might as well calculated now GlobalCalc globalCalc = settings.getGlobalCalc(); ClassStreamAttValVecI trainGlobalData = globalCalc.applyGlobals(trainStreamData); ClassStreamAttValVecI testGlobalData = globalCalc.applyGlobals(testStreamData); // And we might as well extract the events. Debug.dp(Debug.PROGRESS, "PROGRESS: Globals calculated."); Debug.dp(Debug.PROGRESS, "Train: " + trainGlobalData.size() + " Test: " + testGlobalData.size()); ClassStreamEventsVecI trainEventData = evExtractor.extractEvents(trainStreamData); ClassStreamEventsVecI testEventData = evExtractor.extractEvents(testStreamData); Debug.dp(Debug.PROGRESS, "PROGRESS: Events extracted"); // System.out.println(trainEventData.toString()); // Now we want the clustering algorithms only to cluster // instances of each class. Make an array of clusterers, // one per class. int numTestStreams = testEventData.size(); int numClasses = domDesc.getClassDescVec().size(); EventDescVecI eventDescVec = evExtractor.getDescription(); EventClusterer[] eventClusterers = new EventClusterer[numClasses]; // And now, initialise. for(int i=0; i < numClasses; i++){ // The new way: eventClusterers[i] = settings.getEventClusterer(); // The old way: // eventClusterers[i] = new EventClusterer(new // StreamTokenizer( // new FileReader(thisExp.evClusterDesc)), // domDesc, // eventDescVec); // System.out.println(eventClusterers[i]); } // Segment the data. ClassStreamEventsVec[] trainStreamsByClass = new ClassStreamEventsVec[numClasses]; for(int i=0; i < numClasses; i++){ trainStreamsByClass[i] = new ClassStreamEventsVec(); trainStreamsByClass[i].setClassVec(new ClassificationVec()); trainStreamsByClass[i].setStreamEventsVec(new StreamEventsVec()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Data rearranged."); //And now load it up. StreamEventsVecI trainEventSEV = trainEventData.getStreamEventsVec(); ClassificationVecI trainEventCV = trainEventData.getClassVec(); int numTrainStreams = trainEventCV.size(); for(int i=0; i < numTrainStreams; i++){ int currentClass = trainEventCV.elAt(i).getRealClass(); trainStreamsByClass[currentClass].add(trainEventSEV.elAt(i), trainEventCV.elAt(i)); } ClusterVecI[] clustersByClass = new ClusterVecI[numClasses]; for(int i=0; i < numClasses; i++){ clustersByClass[i] = eventClusterers[i].clusterEvents(trainStreamsByClass[i]); Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering of " + i + " complete"); Debug.dp(Debug.PROGRESS, "Clusters for class: " + domDesc.getClassDescVec().getClassLabel(i) + " are:"); Debug.dp(Debug.PROGRESS, eventClusterers[i].getMapping()); } Debug.dp(Debug.PROGRESS, "PROGRESS: Clustering complete. "); // But wait! There's more! There is always more. // The first thing was only useful for clustering. // Now attribution. We want to attribute all the data. So we are going // to have one dataset for each learner. // First set up the attributors. Attributor[] attribsByClass = new Attributor[numClasses]; for(int i=0; i < numClasses; i++){ attribsByClass[i] = new Attributor(domDesc, clustersByClass[i], eventClusterers[i].getDescription()); Debug.dp(Debug.PROGRESS, "PROGRESS: AttributorMkr of " + i + " complete."); } ClassStreamAttValVecI[] trainEventAtts = new ClassStreamAttValVec[numClasses]; ClassStreamAttValVecI[] testEventAtts = new ClassStreamAttValVec[numClasses]; for(int i=0; i < numClasses; i++){ trainEventAtts[i] = attribsByClass[i].attribute(trainStreamData, trainEventData); testEventAtts[i] = attribsByClass[i].attribute(testStreamData, testEventData);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -