📄 aprioritfpclass.java
字号:
/* -------------------------------------------------------------------------- *//* *//* Apriori-TFP CLASSIFIER *//* *//* Frans Coenen *//* *//* Monday 2 February 2004 *//* *//* Department of Computer Science *//* The University of Liverpool *//* *//* -------------------------------------------------------------------------- *//* Class structureAssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree | +-- AprioriTFPclass */// Java packagesimport java.util.*; import java.io.*;/** Methods to produce classification rules using a Apriori-T appraoch. Assumes that input dataset is orgnised such that classifiers are at the end of each record. Note: number of classifiers value is stored in the <TT>numClasses</TT> field.@author Frans Coenen@version 29 April 2003 */public class AprioriTFPclass extends PartialSupportTree { /* ------ FIELDS ------ */ // Data structures /** 2-D array to hold the test data <P> Note that classifiaction involves producing a set of Classification Rules (CRs) from a training set and then testing the effectiveness of the CRs on a test set. */ protected short[][] testDataArray = null; /** 3-data array to hold 10th sets of input data. <P> Used in conjunction with "10 Cross Validation" where the input data is divided into 10 sunsets and CRs are produced using each subset in turn and validated against the remaininmg 9 sets. The oveerall average accuracy is then the total accuracy divided by 10. */ protected short[][][] tenthDataSets = new short[10][][]; // Other fields /** Number of rows in input data set, not the same as the number of rows in the classification training set. <P> Used for temporery storage of total number of rows when using Ten Cross Validation (TCV) approach only. <P> The <TT>numRows</TT> field inherited from the super class records is used throughout the CR generation process. Set to number of rows using <TT>setNumRowsInInputSet</TT> method called by application class. */ protected int numRowsInInputSet; /** Number of rows in test set, again not the same as the number of rows in the classification training set. */ protected int numRowsInTestSet; /** Number of rows in training set, also not the same as the number of rows in the classification training set. */ protected int numRowsInTrainingSet; /** Percentage describing classification accuarcy. */ protected double accuracy; // Diagnostic fields /** Average accuracy as the result of TCV. */ protected double averageAccuracy; /** Average number of frequent sets as the result of TCV. */ protected double averageNumFreqSets; /** Average number of updates as the result of TCV. */ protected double averageNumUpdates; /** Average accuracy number of callsification rules as the result of TCV. */ protected double averageNumCRs; /* ------ CONSTRUCTORS ------ */ /** Constructor processes command line arguments. @param args the command line arguments (array of String instances). */ public AprioriTFPclass(String[] args) { super(args); } /* ------ METHODS ------ */ /* ---------------------------------------------------------------- */ /* */ /* COMMAND LINE ARGUMENTS */ /* */ /* ---------------------------------------------------------------- */ /* IDENTIFY ARGUMENT */ /** Identifies nature of individual command line agruments: -C = confidence, -F = file name, -S = support, -N= number of classes. ( Overides higher level method.) @param argument the given argument. */ protected void idArgument(String argument) { if (argument.charAt(0) == '-') { char flag = argument.charAt(1); argument = argument.substring(2,argument.length()); switch (flag) { case 'C': confidence = Double.parseDouble(argument); break; case 'F': fileName = argument; break; case 'S': support = Double.parseDouble(argument); break; case 'N': numClasses = Integer.parseInt(argument); break; default: System.out.println("ERROR 1: Unrecognise command line " + " argument -" + flag + argument); errorFlag = false; } } else { System.out.println("ERROR 2: All command line arguments must " + "commence with a '-' character (" + argument + ")"); errorFlag = false; } } /* CHECK INPUT ARGUMENTS */ /** Invokes methods to check values associated with command line arguments (overides higher level method). */ protected void CheckInputArguments() { // Check support and confidence input checkSupportAndConfidence(); // Check file name checkFileName(); // Check number of classes checkNumClasses(); // Return if (errorFlag) outputSettings(); else outputMenu(); } /* CHECK NUMBER OF CLASSES */ /** Checks if number of classes command line parameter has been set appropriately. */ private void checkNumClasses() { if (numClasses == 0) { System.out.println("ERROR 3: Must specify number of classes (-N)"); errorFlag = false; } if (numClasses < 0) { System.out.println("ERROR 4: Number of classes must be a " + "positive integer"); errorFlag = false; } } /* SET SUPPORT AND CONFIDENCE */ /** Sets new values for the support and confidence fields. @param newSupport the new support value. @param newConfidence the new confidence value. */ public void setSupportAndConfidence(double newSupport, double newConfidence) { support = newSupport; confidence = newConfidence; } /* ---------------------------------------------------------------- */ /* */ /* DATA SET UTILITIES */ /* */ /* ---------------------------------------------------------------- */ /* REORDER INPUT DATA: */ /** Reorders input data according to frequency of single attributes but excluding classifiers which are left unordered at the end of the attribute list. <P> Overides method in <TT>AssocRuleMining</TT> class. Note reordering makes for more efficient executuion of the T-tree (and P-tree) algorithms. */ public void idInputDataOrdering() { // Count singles and store in countArray; int[][] countArray = countSingles(); // Bubble sort count array on support value (second index) orderFirstNofCountArray(countArray,numCols-numClasses); // Define conversion and reconversion arrays defConvertArrays(countArray); } /* PRUNE UNSUPPORTED ATTRIBUTES */ /** Removes single attributes (not classifiers) from input data set which fo nou meet the minimum support requirement. */ public void pruneUnsupportedAtts() { short[] itemSet; int attribute; // Step through data array using loop construct for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) { // Check for empty row if (dataArray[rowIndex]!= null) { itemSet = null; // For each attribute in the current record (not the classifier) // find if supported with reference to the conversion array. If // so add to "itemSet". int maxLength = dataArray[rowIndex].length-1; for(int colIndex=0;colIndex<maxLength;colIndex++) { attribute = dataArray[rowIndex][colIndex]; // Check support if (conversionArray[attribute][1] >= minSupport) { itemSet = reallocInsert(itemSet, (short) conversionArray[attribute][0]); } } // Add classifier itemSet = reallocInsert(itemSet, dataArray[rowIndex][maxLength]); // Return new item set to data array dataArray[rowIndex] = itemSet; } } // Adjust classifiers recastClassifiers(); // Reset number of one item sets field numOneItemSets = getNumSupOneItemSets(); } /* RECAST CLASSIFIERS */ /** Adjusts classifier IDs in data array where attributes have been pruned using <TT>pruneUnsupportedAtts</TT> method. <P> Proceeds by looping through data table and subtracting a value equal to the number of removed attributes from the value of the last element (the classifier) in each record. */ private void recastClassifiers() { short difference = (short) (numCols-getNumSupOneItemSets()); // Step through data array using loop construct int lastIndex; for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) { lastIndex = dataArray[rowIndex].length-1; dataArray[rowIndex][lastIndex] = (short) (dataArray[rowIndex][lastIndex]-difference); } } /* GET NUM OF SUPPORTE ONE ITEM SETS */ /** Gets number of supported attributess (note this is not necessarily the same as the number of columns/attributes in the input set) plus the number of classifiers. <P> Overides parent method which returns the number of support 1 itemsets. This would exclude any classifiers whose support value was below the minimum support threshold. @return Number of supported 1-item stes */ protected int getNumSupOneItemSets() { int counter = 0; // Step through conversion array incrementing counter for each // supported element found int length = conversionArray.length-numClasses; for (int index=1;index < length;index++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -