📄 dectreeinfogain.java
字号:
/* -------------------------------------------------------------------------- */
/* */
/* DEISSION TREE (INFORMATION GAIN) */
/* */
/* Frans Coenen */
/* */
/* Tuesday 20 November 2007 */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
/* Generates a decision tree using information gain as a the splitting
criteria. */
//package lucsKDD_ARM;
/** decision tree classifier generator (inforamtion gain).
@author Frans Coenen
@version 20 November 2007 */
public class DecTreeInfoGain extends DecisionTree {
/* ------ CONSTRUCTORS ------ */
/** Constructor with command line arguments to be process.
@param args the command line arguments (array of String instances). */
public DecTreeInfoGain(String[] args) {
super(args);
}
/* ------ METHODS ------ */
/** Selects attribute from attribute list on which to split (stub).
@param attributeList the cuttent list of attrbutes.
@param exampleList the currebt list of examples.
@return the attribute index of the selected attribute. */
protected int selectAttribute(short[] attributeList, int[] exampleList) {
// Calculate the entropy in the attribute list
double entropy = calcEntropy(exampleList);
// Get index of attribute that delivers the highest information gain
int bestIndex = maximiseInfoGain(attributeList,exampleList,entropy);
//System.out.println("bestIndex = " + bestIndex + "\n----------------------");
// Return
return(bestIndex);
}
/* ------ CALCULATE ENTROPY ------ */
/** Calculates the entropy represented by the set of examples.
@param exampleList the currebt list of examples.
@return the entropy in the exampleList with respect to the class
attribute. */
private double calcEntropy(int[] exampleList) {
// Create class array
double[] classArray = new double[numClasses];
//System.out.println("numAttributes = " + numAttributes);
for (int i=0;i<classArray.length;i++) classArray[i]=0;
// Loop through example list
for (int i=0;i<exampleList.length;i++) {
int lastIndex = dataArray[exampleList[i]].length-1;
int classNum = dataArray[exampleList[i]][lastIndex];
int classIndex = classNum-numAttributes-1;
//System.out.println("classIndex = " + classIndex);
// Increment
classArray[classIndex] = classArray[classIndex]+1.0;
}
// Entropy
double size = (double) exampleList.length;
double entropy = 0;
for (int i=0;i<classArray.length;i++) {
double quota = classArray[i]/size;
entropy = entropy - (quota*(Math.log(quota)/Math.log(2)));
//System.out.println("entropy sofar (class " + i + ") = " + entropy);
}
// End
return(entropy);
}
/** Calculates gain for the given set of attributes and selects attribute
with highest gain (reduvtion in entropy).
@param attributeList the cuttent list of attrbutes.
@param exampleList the currebt list of examples.
@param entropy the entropy in the exampleList with respect to the class
attribute.
@return the attribute index of the selected attribute. */
private int maximiseInfoGain(short[] attributeList, int[] exampleList,
double entropy) {
// Create array in which to store gain asscoated with each attribute
// in the attribute set
double[] attributeGains = new double[attributeList.length];
// process attribute list and claculate individual gains
for (int i=0;i<attributeList.length;i++) {
attributeGains[i] = calcGain(attributeList[i],exampleList,
entropy);
}
// Select attribute with highrst gain (reduction in entropy).
double bestGain = attributeGains[0];
int bestIndex = 0;
for (int i=1;i<attributeGains.length;i++) {
if (bestGain<attributeGains[i]) {
bestGain = attributeGains[i];
bestIndex = i;
}
}
// Return best index
return(bestIndex);
}
/** Calculates gain for the given attribute.
@param attribute the given attribute.
@param exampleList the current list of example records.
@param entropy the entropy in the exampleList with respect to the class
attribute.
@return the information gain of the given ayytibute is ised as the '
splitting' attribute. */
private double calcGain(short attribute, int[] exampleList,
double entropy) {
double posExamples=0;
double negExamples=0;
// Loop through example list
for (int i=0;i<exampleList.length;i++) {
if (memberOf(attribute,dataArray[exampleList[i]]))
posExamples=posExamples+1.0;
else negExamples=negExamples+1.0;
}
negExamples=negExamples/exampleList.length;
posExamples=posExamples/exampleList.length;
//System.out.println("negExamples = " + negExamples + ", posExamples = " +
//posExamples);
// calculate
double gainPos = 0;
double gainNeg = 0;
if (posExamples>0.0) gainPos = posExamples *
(Math.log(posExamples)/Math.log(2));
if (negExamples>0.0) gainNeg = negExamples *
(Math.log(negExamples)/Math.log(2));
double gain = entropy - (- gainPos - gainNeg);
//System.out.println("gainPos = " + gainPos + ", gainNeg = " + gainNeg +
//"\n\rgain attribite " + attribute + " = " + gain);
// Return
return(gain);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -