📄 ridor.java
字号:
m_Worth = worthValue[antdsSize-1]; // WorthValues of the last antecedent
m_WorthRate = worthRt[antdsSize-1];
m_CoverP = coverage[antdsSize-1];
Antd last = (Antd)m_Antds.lastElement();
m_CoverG = last.getCover();
m_AccuG = last.getAccu();
}
else{ // Default rule
m_Worth = defAccu; // Default WorthValues
m_WorthRate = defAccuRate;
m_CoverP = total;
}
}
/**
* Private function to compute default number of accurate instances
* in the specified data for m_Class
*
* @param data the data in question
* @return the default accuracy number
*/
private double computeDefAccu(Instances data){
double defAccu=0;
for(int i=0; i<data.numInstances(); i++){
Instance inst = data.instance(i);
if(Utils.eq(inst.classValue(), m_Class))
defAccu += inst.weight();
}
return defAccu;
}
/** The following are get functions after prune() has set the value of worthRate and worth*/
public double getWorthRate(){ return m_WorthRate; }
public double getWorth(){ return m_Worth; }
public double getCoverP(){ return m_CoverP; }
public double getCoverG(){ return m_CoverG; }
public double getAccuG(){ return m_AccuG; }
/**
* Prints this rule with the specified class label
*
* @param att the string standing for attribute in the consequent of this rule
* @param cl the string standing for value in the consequent of this rule
* @return a textual description of this rule with the specified class label
*/
public String toString(String att, String cl) {
StringBuffer text = new StringBuffer();
if(m_Antds.size() > 0){
for(int j=0; j< (m_Antds.size()-1); j++)
text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and ");
text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")");
}
text.append(" => " + att + " = " + cl);
text.append(" ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+
m_CoverP+"/"+(m_CoverP - m_Worth)+"]");
return text.toString();
}
/**
* Prints this rule
*
* @return a textual description of this rule
*/
public String toString() {
return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class));
}
}
/**
* The single antecedent in the rule, which is composed of an attribute and
* the corresponding value. There are two inherited classes, namely NumericAntd
* and NominalAntd in which the attributes are numeric and nominal respectively.
*/
private abstract class Antd implements Serializable {
/* The attribute of the antecedent */
protected Attribute att;
/* The attribute value of the antecedent.
For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
protected double value;
/* The maximum infoGain achieved by this antecedent test */
protected double maxInfoGain;
/* The accurate rate of this antecedent test on the growing data */
protected double accuRate;
/* The coverage of this antecedent */
protected double cover;
/* The accurate data for this antecedent */
protected double accu;
/* Constructor*/
public Antd(Attribute a){
att=a;
value=Double.NaN;
maxInfoGain = 0;
accuRate = Double.NaN;
cover = Double.NaN;
accu = Double.NaN;
}
/* The abstract members for inheritance */
public abstract Instances[] splitData(Instances data, double defAcRt, double cla);
public abstract boolean isCover(Instance inst);
public abstract String toString();
/* Get functions of this antecedent */
public Attribute getAttr(){ return att; }
public double getAttrValue(){ return value; }
public double getMaxInfoGain(){ return maxInfoGain; }
public double getAccuRate(){ return accuRate; }
public double getAccu(){ return accu; }
public double getCover(){ return cover; }
}
/**
* The antecedent with numeric attribute
*/
private class NumericAntd extends Antd{
/* The split point for this numeric antecedent */
private double splitPoint;
/* Constructor*/
public NumericAntd(Attribute a){
super(a);
splitPoint = Double.NaN;
}
/* Get split point of this numeric antecedent */
public double getSplitPoint(){ return splitPoint; }
/**
* Implements the splitData function.
* This procedure is to split the data into two bags according
* to the information gain of the numeric attribute value
* The maximum infoGain is also calculated.
*
* @param insts the data to be split
* @param defAcRt the default accuracy rate for data
* @param cl the class label to be predicted
* @return the array of data after split
*/
public Instances[] splitData(Instances insts, double defAcRt, double cl){
Instances data = new Instances(insts);
data.sort(att);
int total=data.numInstances();// Total number of instances without
// missing value for att
int split=1; // Current split position
int prev=0; // Previous split position
int finalSplit=split; // Final split position
maxInfoGain = 0;
value = 0;
// Compute minimum number of Instances required in each split
double minSplit = 0.1 * (data.sumOfWeights()) / 2.0;
if (Utils.smOrEq(minSplit,m_MinNo))
minSplit = m_MinNo;
else if (Utils.gr(minSplit,25))
minSplit = 25;
double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
for(int x=0; x<data.numInstances(); x++){
Instance inst = data.instance(x);
if(inst.isMissing(att)){
total = x;
break;
}
sndCover += inst.weight();
if(Utils.eq(inst.classValue(), cl))
sndAccu += inst.weight();
}
// Enough Instances with known values?
if (Utils.sm(sndCover,(2*minSplit)))
return null;
if(total == 0) return null; // Data all missing for the attribute
splitPoint = data.instance(total-1).value(att);
for(; split < total; split++){
if(!Utils.eq(data.instance(split).value(att),
data.instance(prev).value(att))){ // Can't split within same value
for(int y=prev; y<split; y++){
Instance inst = data.instance(y);
fstCover += inst.weight(); sndCover -= inst.weight();
if(Utils.eq(data.instance(y).classValue(), cl)){
fstAccu += inst.weight(); // First bag positive# ++
sndAccu -= inst.weight(); // Second bag positive# --
}
}
if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){
prev=split; // Cannot split because either
continue; // split has not enough data
}
double fstAccuRate = 0, sndAccuRate = 0;
if(!Utils.eq(fstCover,0))
fstAccuRate = fstAccu/fstCover;
if(!Utils.eq(sndCover,0))
sndAccuRate = sndAccu/sndCover;
/* Which bag has higher information gain? */
boolean isFirst;
double fstInfoGain, sndInfoGain;
double accRate, infoGain, coverage, accurate;
fstInfoGain = Utils.eq(fstAccuRate, 0) ?
0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt)));
sndInfoGain = Utils.eq(sndAccuRate, 0) ?
0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt)));
if(Utils.gr(fstInfoGain,sndInfoGain) ||
(Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){
isFirst = true;
infoGain = fstInfoGain;
accRate = fstAccuRate;
accurate = fstAccu;
coverage = fstCover;
}
else{
isFirst = false;
infoGain = sndInfoGain;
accRate = sndAccuRate;
accurate = sndAccu;
coverage = sndCover;
}
boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
/* Check whether so far the max infoGain */
if(isUpdate){
splitPoint = (data.instance(split).value(att) +
data.instance(prev).value(att))/2;
value = ((isFirst) ? 0 : 1);
accuRate = accRate;
accu = accurate;
cover = coverage;
maxInfoGain = infoGain;
finalSplit = split;
}
prev=split;
}
}
/* Split the data */
Instances[] splitData = new Instances[2];
splitData[0] = new Instances(data, 0, finalSplit);
splitData[1] = new Instances(data, finalSplit, total-finalSplit);
return splitData;
}
/**
* Whether the instance is covered by this antecedent
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is covered
* by this antecedent
*/
public boolean isCover(Instance inst){
boolean isCover=false;
if(!inst.isMissing(att)){
if(Utils.eq(value, 0)){
if(Utils.smOrEq(inst.value(att), splitPoint))
isCover=true;
}
else if(Utils.gr(inst.value(att), splitPoint))
isCover=true;
}
return isCover;
}
/**
* Prints this antecedent
*
* @return a textual description of this antecedent
*/
public String toString() {
String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
}
}
/**
* The antecedent with nominal attribute
*/
private class NominalAntd extends Antd{
/* The parameters of infoGain calculated for each attribute value */
private double[] accurate;
private double[] coverage;
private double[] infoGain;
/* Constructor*/
public NominalAntd(Attribute a){
super(a);
int bag = att.numValues();
accurate = new double[bag];
coverage = new double[bag];
infoGain = new double[bag];
}
/**
* Implements the splitData function.
* This procedure is to split the data into bags according
* to the nominal attribute value
* The infoGain for each bag is also calculated.
*
* @param data the data to be split
* @param defAcRt the default accuracy rate for data
* @param cl the class label to be predicted
* @return the array of data after split
*/
public Instances[] splitData(Instances data, double defAcRt, double cl){
int bag = att.numValues();
Instances[] splitData = new Instances[bag];
for(int x=0; x<bag; x++){
accurate[x] = coverage[x] = infoGain[x] = 0;
splitData[x] = new Instances(data, data.numInstances());
}
for(int x=0; x<data.numInstances(); x++){
Instance inst=data.instance(x);
if(!inst.isMissing(att)){
int v = (int)inst.value(att);
splitData[v].add(inst);
coverage[v] += inst.weight();
if(Utils.eq(inst.classValue(), cl))
accurate[v] += inst.weight();
}
}
// Check if >=2 splits have more than the minimal data
int count=0;
for(int x=0; x<bag; x++){
double t = coverage[x];
if(Utils.grOrEq(t, m_MinNo)){
double p = accurate[x];
if(!Utils.eq(t, 0.0))
infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt)));
++count;
}
}
if(count < 2) // Don't split
return null;
value = (double)Utils.maxIndex(infoGain);
cover = coverage[(int)value];
accu = accurate[(int)value];
if(!Utils.eq(cover,0))
accuRate = accu / cover;
else accuRate = 0;
maxInfoGain = infoGain [(int)value];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -