📄 aprioritfp_cmar.java
字号:
/* -------------------------------------------------------------------------- *//* *//* APRIORI-TFP CMAR *//* (CLASSIFICATION BASED ON MULTIPLE ASSOCIATION RULES) *//* *//* Frans Coenen *//* *//* Tuesday 2 March 2004 *//* *//* Department of Computer Science *//* The University of Liverpool *//* *//* -------------------------------------------------------------------------- *//* Class structureAssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree | +--AprioriTFPclass | +-- AprioriTFP_CMAR */// Java packagesimport java.util.*; import java.io.*;/** Methods to produce classification rules using Wenmin Li, Jiawei Han and Jian Pei's CMAR (Classification based on Multiple associate Rules) algorithm but founded on Apriori-TFP. Assumes that input dataset is orgnised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the <TT>numClasses</TT> field.@author Frans Coenen@version 2 March 2004 */public class AprioriTFP_CMAR extends AprioriTFPclass { /* ------ FIELDS ------ */ // CONSTANTS /** The maximum number of CARs */ private final int MAX_NUM_CARS = 80000; // OTHER FIELDS /** The number of CARs generated so far. */ private int numCarsSoFar = 0; /* ------ CONSTRUCTORS ------ */ /** Constructor processes command line arguments. @param args the command line arguments (array of String instances). */ public AprioriTFP_CMAR(String[] args) { super(args); } /* ------ METHODS ------ */ /* START CMAR CLASSIFICATION */ /** Starts CMAR classifier generation proces. <P> Proceeds as follows:<OL> <LI>Generate all CARs using Apriori-TFP and place selected CARs into linked list of rules. <LI>Prune list according the cover stratgey. <LI>Test classification using Chi-Squared Weighting approach.</OL> @return The classification accuarcay (%). */ public double startCMARclassification() { System.out.println("START APRIORI-TFP CMAR\n" + "--------------------------"); // Generate all CARs using Apriori-TFP and place selected CARs into // linked list of rules. startCARgeneration();//currentRlist.outputRules(); // Prune linked list of rules using "cover" principal currentRlist.outputNumRules(); System.out.println("prune CARS"); currentRlist.pruneUsingCover(copyItemSet(dataArray)); // Test classification using the test set. return(testClassification()); } /* Commences process of genertaing CARS using apriori TFP. <P> For each rule generated add to rule list if: (i) Chi-Squared value is above a specified critical threshold (5% by default), and (ii) the CR tree does not contain a more general rule with a higher ordering. Rule added to rule list according to ranking (ordering). */ private void startCARgeneration() { // Calculate minimum support threshold in terms of number of // records in the training set. outputSuppAndConf(); minSupport = numRowsInTrainingSet*support/100.0; System.out.println("Num rows in training set = " + numRows + ", reduced minimum support = " + minSupport); currentRlist.setNumRows(numRows); currentRlist.setNumClasses(numClasses); currentRlist.setNumOneItemSets(numOneItemSets); // Set rule list to null. Note that startRuleList is defined in the // AssocRuleMining parent class and is also used to store Association // Rules (ARS) with respect ARM. currentRlist.startRulelist = null; numCarsSoFar = 0; // Create P-tree createPtree(); // Generate T-tree and generate CARS createTotalSupportTree(); } /*----------------------------------------------------------------------- */ /* */ /* APRIORI-TFP CMAR WITH TEN CROSS VALIDATION (TCV) */ /* */ /*----------------------------------------------------------------------- */ /* COMMEMCE TEN CROSS VALIDATION WITH OUTPUT*/ /** Start Ten Cross Validation (TCV) process with output of individual accuracies. */ public void commenceTCVwithOutput() { double[][] parameters = new double[10][4]; System.out.println("START TCV APRIORI-TFP CMAR CLASSIFICATION\n" + "------------------------------------"); // Loop through tenths data sets for (int index=0;index<10;index++) { System.out.println("[--- " + index + " ---]"); // Create training and test sets createTrainingAndTestDataSets(index); // Mine data, produce T-tree and generate CRs parameters[index][0] = startCMARclassification(); parameters[index][1] = countNumFreqSets(); parameters[index][2] = numUpdates; parameters[index][3] = currentRlist.getNumCMAR_CRs(); } // Determine totals double totalAccu = 0; double totalNumFreqSets = 0; double totalNumUpdates = 0; double totalNumCRs = 0; for (int index=0;index<parameters.length;index++) { System.out.println("(" + (index+1) + ") Accuracy = " + parameters[index][0] + ", Num. Freq. Sets = " + parameters[index][1] + ", Num Updates = " + parameters[index][2] + ", Num CRs = " + parameters[index][3]); // Totals totalAccu = totalAccu+parameters[index][0]; totalNumFreqSets = totalNumFreqSets+parameters[index][1]; totalNumUpdates = totalNumUpdates+parameters[index][2]; totalNumCRs = totalNumCRs+parameters[index][3]; } // Calculate averages averageAccuracy = totalAccu/10; averageNumFreqSets = totalNumFreqSets/10; averageNumUpdates = totalNumUpdates/10; averageNumCRs = totalNumCRs/10; // Output avergaes System.out.println("---------------------------------------"); System.out.println("Average Accuracy = " + averageAccuracy + ", Num. Freq. Sets = " + averageNumFreqSets + ", Avergae Num Updates = " + averageNumUpdates + ", Average Num CRs = " + averageNumCRs); } /*----------------------------------------------------------------------- */ /* */ /* CLASSIFICATION ASSOCIATION RULE (CAR) GENERATION */ /* */ /*----------------------------------------------------------------------- */ /* GENERATE CLASSIFICATION ASSOCIATION RULES */ /** Initiates process of generating Classification Association Rules (CARS), Loops through top level of T-tree as part of the CAR generation process. <P>CARs differ from ARs in that they have only a single consequent and that the number of admissable consequents is limited. Note that classifiers are assumed to be listed at the end of the attribute list. @param start the identification number of the first classifier to be considered. */ private void generateCARs(int level) { // Loop for (int index=numOneItemSets-numClasses+1; index<=numOneItemSets;index++) { if (startTtreeRef[index]!=null && startTtreeRef[index].childRef!=null) { if (startTtreeRef[index].support >= minSupport) { short[] consequent = new short[1]; consequent[0] = (short) index; generateCARs(null,index,level-1,consequent, startTtreeRef[index].childRef); } } } } /* GENERATE CLASSIFICATION ASSOCIATION RULES */ /** Continues process of generating classificationh association rules from a T-tree by recursively looping through T-tree level by level. @param itemSetSofar the label for a T-treenode as generated sofar. @param size the length/size of the current array lavel in the T-tree. @param level the current level in the T-tree @param consequent the current consequent (classifier) for the CAR. @param linkRef the reference to the current array lavel in the T-tree. */ protected void generateCARs(short[] itemSetSofar, int size, int level, short[] consequent, TtreeNode[] linkRef) {//System.out.print("In generateCARs: itemSetSofar = ");//outputItemSet(itemSetSofar);//System.out.println(", size = " + size + ", level = " + level +//", consequent = " + consequent[0]); // If no more nodes return if (linkRef == null) return; // Check number of CARS generated so far if (numCarsSoFar>MAX_NUM_CARS) { System.out.println("Number of CARs (" + numCarsSoFar + ") generted so far exceeds limit of " + MAX_NUM_CARS + ", generation process stopped!"); return; } // At right level if (level==1) { for (int index=1; index < size; index++) { // Check if node exists if (linkRef[index] != null) { // Generate Antecedent short[] tempItemSet = realloc2(itemSetSofar,(short) index); // Determine confidence double suppForAntecedent = (double) getSupportForItemSetInTtree(tempItemSet); double confidenceForCAR = getConfidence(suppForAntecedent, linkRef[index].support); // Add CAR to linked list structure if confidence greater // than minimum confidence threshold. if (confidenceForCAR >= confidence) {//System.out.print("("+ (numCarsSoFar+1) + ")Insert CAR: tempItemSet = ");//outputItemSet(tempItemSet);//System.out.println(", consequent = " + consequent[0] +//", confidenceForCAR = " + confidenceForCAR); numCarsSoFar++; double suppForConcequent = (double) getSupportForItemSetInTtree(consequent); currentRlist.insertRinRlistCMARranking(tempItemSet, consequent,suppForAntecedent,suppForConcequent, linkRef[index].support,confidenceForCAR); } } } return; } //System.out.println("Wrong level"); // Wrong level, Otherwise process for (int index=1; index < size; index++) {//System.out.println("W index = " + index + ", size = " + size); // Check if node exists if (linkRef[index] != null && linkRef[index].childRef!=null) { short[] tempItemSet = realloc2(itemSetSofar,(short) index);//System.out.print("tempItemSet = ");//outputItemSet(tempItemSet);//System.out.println(); // Proceed down child branch generateCARs(tempItemSet,index,level-1,consequent, linkRef[index].childRef); } } } /*------------------------------------- */ /* */ /* T-TREE METHODS */ /* */ /*------------------------------------- */ /* CREATE T-TREE LEVEL N */ /** Commences the process of determining the remaining levels in the T-tree (other than the top level), level by level in an "Apriori" manner. <P> Follows an add support, prune, generate loop until there are no more levels to generate. */ protected void createTtreeLevelN() { int nextLevel=2; // Loop while a further level exists while (nextLevelExists) { // Add support addSupportToTtreeLevelN(nextLevel); // Prune unsupported candidate sets pruneLevelN(startTtreeRef,nextLevel); // Generate CARs generateCARs(nextLevel); // Check number of frequent sets generated so far if (numFrequentsets>MAX_NUM_FREQUENT_SETS) { System.out.println("Number of frequent sets (" + numFrequentsets + ") generted so far " + "exceeds limit of " + MAX_NUM_FREQUENT_SETS + ", generation process stopped!"); break; } // Attempt to generate next level nextLevelExists=false; generateLevelN(startTtreeRef,nextLevel,null); nextLevel++; } //End System.out.println("Levels in T-tree = " + nextLevel); } /* ---------------------------------------------------------------- */ /* */ /* TEST CLASSIFICATION */ /* */ /* ---------------------------------------------------------------- */ /* TEST CLASSIFICATION */ /** Tests the generated classification rules using test sets and return percentage accuracy. @param the perecentage accuarcy. */ private double testClassification() { int correctClassCounter = 0; int wrongClassCounter = 0; int unclassifiedCounter = 0; // Check if test data exists, if not return' 0' if (testDataArray==null) { System.out.println("WARNING: No test data"); return(0); } // Check if any classification rules have been generated, if not // return'0'. if (currentRlist.startCMARrulelist==null) { System.out.println("No classification rules generated!"); return(0); } // Loop through test set int index=0; for(;index<testDataArray.length;index++) { // Note: classifyRecord methods are contained in the // AssocRuleMining class. To calssify without default use // classifyRecord, with defualt use classifyRecordDefault. short classResult = currentRlist.classifyRecordWCS(testDataArray[index]); if (classResult==0) unclassifiedCounter++; else { short classActual = getLastElement(testDataArray[index]); if (classResult == classActual) correctClassCounter++; else wrongClassCounter++; } } // Xalculate abd return classification accuracy double accuracy = ((double) correctClassCounter*100.0/(double) index);//System.out.println("correctClassCounter = " + correctClassCounter);//System.out.println("unclassifiedCounter = " + unclassifiedCounter);//System.out.println("wrongClassCounter = " + wrongClassCounter);//System.out.println("Num case = " + index);//System.out.println("accuracy = " + accuracy); // Return return(accuracy); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -