📄 decisiontree.java
字号:
/* -------------------------------------------------------------------------- */
/* */
/* DEISSION TREE */
/* */
/* Frans Coenen */
/* */
/* Thursday 8 November 2007 */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
//package lucsKDD_ARM;
/* To compile: javaARMpackc.exe decisionTree.java */
// Java packages
// Java GUI packages
import javax.swing.*;
/** decision tree classifier generator.
@author Frans Coenen
@version 8 November 2007 */
public class DecisionTree extends AssocRuleMining {
/* ------ FIELDS ------ */
// Data structures
/** 2-D array to hold the test data <P> Note that classifiaction
involves producing a set of Classification Rules (CRs) from a training
set and then testing the effectiveness of the CRs on a test set. */
protected short[][] testDataArray = null;
/** Attribute list from which selections are made. */
private int[] attributeList = null;
/** Start reference to decision tree. */
private DecisionTreeNode root = null;
// Other fields
/** Number of rows in training set, also not the same as the number of rows
in the classification training set. */
private int numRowsInTrainingSet = 0;
/** Number of rows in test set, again not the same as the number of rows
in the classification training set. */
private int numRowsInTestSet = 0;
/** Number of attributes. */
protected int numAttributes = 0;
/* ------ CONSTRUCTORS ------ */
/** Constructor with command line arguments to be process.
@param args the command line arguments (array of String instances). */
public DecisionTree(String[] args) {
super(args);
}
/* ---------------------------------------------------------------- */
/* */
/* COMMAND LINE ARGUMENTS */
/* */
/* ---------------------------------------------------------------- */
/* IDENTIFY ARGUMENT */
/** Identifies nature of individual command line agruments:
-C = confidence, -F = file name, -N = number of classes -S = support, -T
testFile name. <P>(Overides higher level method.)
@param argument the given argument. */
protected void idArgument(String argument) {
if (argument.length()<3) {
JOptionPane.showMessageDialog(null,"Command line argument '" +
argument + "' too short.","COMMAND LINE " +
"INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
else if (argument.charAt(0) == '-') {
char flag = argument.charAt(1);
argument = argument.substring(2,argument.length());
switch (flag) {
case 'F':
fileName = argument;
break;
case 'N':
numClasses = Integer.parseInt(argument);
break;
case 'T':
testSetFileName = argument;
break;
default:
JOptionPane.showMessageDialog(null,"Unrecognise command "+
"line argument: '" + flag + argument + "'.",
"COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
else {
JOptionPane.showMessageDialog(null,"All command line arguments " +
"must commence with a '-' character ('" + argument + "').",
"COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
/* CHECK INPUT ARGUMENTS */
/** Invokes methods to check values associated with command line arguments
(overides higher level method). */
protected void CheckInputArguments() {
// Check support and confidence input
checkSupportAndConfidence();
// Check file name
checkFileName();
// Check number of classes
checkNumClasses();
// Return
if (errorFlag) outputSettings();
else outputMenu();
}
/* CHECK NUMBER OF CLASSES */
/** Checks if number of classes command line parameter has been set
appropriately. */
private void checkNumClasses() {
if (numClasses == 0) {
JOptionPane.showMessageDialog(null,"Must specify number of " +
"classes (-N)","COMMAND LINE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
else if (numClasses < 0) {
JOptionPane.showMessageDialog(null,"Number of classes must be a " +
"positive integer","COMMAND LINE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
/* SET SUPPORT AND CONFIDENCE */
/** Sets new values for the support and confidence fields.
@param newSupport the new support value.
@param newConfidence the new confidence value. */
public void setSupportAndConfidence(double newSupport,
double newConfidence) {
support = newSupport;
confidence = newConfidence;
}
/* ---------------------------------------------------------------- */
/* */
/* DATA SET UTILITIES */
/* */
/* ---------------------------------------------------------------- */
/* REORDER INPUT DATA: */
/** Reorders input data according to frequency of single attributes but
excluding classifiers which are left unordered at the end of the attribute
list. <P> Overides method in <TT>AssocRuleMining</TT> class. Note
reordering makes for more efficient executuion of the T-tree (and P-tree)
algorithms. Identical method in <TT>AprioriTclass</TT> class. */
public void idInputDataOrdering() {
// Count singles and store in countArray;
int[][] countArray = countSingles();
// Bubble sort count array on support value (second index)
orderFirstNofCountArray(countArray,numCols-numClasses);
// Define conversion and reconversion arrays
defConvertArrays(countArray,numCols-numClasses);
// Set sorted flag
isOrderedFlag = true;
}
/* CREATE TRAINING AND TEST DATA SETS. */
/** Populates test and training datasets. <P> Note: (1) assumes a 50:50
split, (2) training data set is stored in the dataArray structure in which
the input data is stored, (3) method called from application class as same
training and test sets may be required if using (say) "hill climbing"
approach to maximise accuracy, (4) method is not called from constructor
partly for same reason as 3 but also because the input data set may (given
a particular application) first require ordering and possibly also pruning
and recasting (see recastClassifiers method). */
public void createTrainingAndTestDataSets() {
// Determine size of training and test sets.
final double percentageSizeOfTestSet = 50.0;
numRowsInTestSet = (int) ((double) numRows*
percentageSizeOfTestSet/100.0);
numRowsInTrainingSet = numRows-numRowsInTestSet;
numRows = numRowsInTrainingSet;
numRules = 0;
// Dimension and populate training set.
short[][] trainingSet = new short[numRowsInTrainingSet][];
int index1=0;
for (;index1<numRowsInTrainingSet;index1++)
trainingSet[index1] = dataArray[index1];
// Dimension and populate test set
testDataArray = new short[numRowsInTestSet][];
for (int index2=0;index1<dataArray.length;index1++,index2++)
testDataArray[index2] = dataArray[index1];
// Assign training set label to input data set label.
dataArray = trainingSet;
}
/* ---------------------------------------------------------------- */
/* */
/* GENERATE decision TREE */
/* */
/* ---------------------------------------------------------------- */
/* Proceedure is as follows.
1. If no more attributes stop, otherwise select attribute on which to split
2. Allocate examples to node branches
3. Proceed down left branch
4. If uniform class stop other wise go to 1
5. Proceed down right branch
6. If uniform class stop other wise go to 1
7. Exit */
/** Generate decision tree. */
protected void startClassification() {
numAttributes = numOneItemSets-numClasses;
// Create test and training sets
createTrainingAndTestDataSets();
// Create Attribute array
short[] attributeList = createAttributeList();
// Create example list
int[] exampleList = generateExampleList();
// Start
root = new DecisionTreeNode();
generateNextNode(root,attributeList,exampleList);
// Generate rule list
generateRuleList();
numberRulesInBinTree();
// Test calssification
double accuracy = testClassification();
System.out.println("Accuracy = " + twoDecPlaces(accuracy) + "\n");
}
/** Generate decision tree node.
@param node the current decision tree node.
@param attributeList the cuttent list of attrbutes.
@param exampleList the currebt list of examples. */
private void generateNextNode(DecisionTreeNode node,
short[] attributeList, int[] exampleList) {
// Select attribute
int attIndex = selectAttribute(attributeList,exampleList);
short nodeAtt = attributeList[attIndex];
node.attNumber = nodeAtt;
// Revise attribute list by removing selected attrivute
attributeList = newAttributeList(attIndex,attributeList);
// Generate new test sets.
int numPosExamples = countPosExamples(nodeAtt,exampleList);
int numNegExamples = exampleList.length-numPosExamples;
int[] posExampleList = null;
int[] negExampleList = null;
if (numPosExamples>0) posExampleList = new int[numPosExamples];
if (numNegExamples>0) negExampleList = new int[numNegExamples];
createNewExampleLists(nodeAtt,exampleList,posExampleList,
negExampleList);
// Generate positive branch
node.posBranch = new DecisionTreeNode();
generateNextNode2(node.posBranch,posExampleList,attributeList);
// Generate negative branch
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -