📄 ridor.java
字号:
if(!ins.isMissing(attr)){ // Attribute not missing if(antd.isCover(ins)){ // Covered by this antecedent coverage[x] += ins.weight(); data.add(ins); // Add to data for further pruning if(Utils.eq(ins.classValue(), m_Class)) // Accurate prediction worthValue[x] += ins.weight(); } } } if(coverage[x] != 0) worthRt[x] = worthValue[x]/coverage[x]; } /* Prune the antecedents according to the accuracy parameters */ for(int z=(size-1); z > 0; z--) if(Utils.sm(worthRt[z], worthRt[z-1])) m_Antds.removeElementAt(z); else break; /* Check whether this rule is a default rule */ if(m_Antds.size() == 1){ defAccu = computeDefAccu(pruneData); defAccuRate = defAccu/total; // Compute def. accuracy if(Utils.sm(worthRt[0], defAccuRate)){ // Becomes a default rule m_Antds.removeAllElements(); } } /* Update the worth parameters of this rule*/ int antdsSize = m_Antds.size(); if(antdsSize != 0){ // Not a default rule m_Worth = worthValue[antdsSize-1]; // WorthValues of the last antecedent m_WorthRate = worthRt[antdsSize-1]; m_CoverP = coverage[antdsSize-1]; Antd last = (Antd)m_Antds.lastElement(); m_CoverG = last.getCover(); m_AccuG = last.getAccu(); } else{ // Default rule m_Worth = defAccu; // Default WorthValues m_WorthRate = defAccuRate; m_CoverP = total; } } /** * Private function to compute default number of accurate instances * in the specified data for m_Class * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data){ double defAccu=0; for(int i=0; i<data.numInstances(); i++){ Instance inst = data.instance(i); if(Utils.eq(inst.classValue(), m_Class)) defAccu += inst.weight(); } return defAccu; } /** The following are get functions after prune() has set the value of worthRate and worth*/ public double getWorthRate(){ return m_WorthRate; } public double getWorth(){ return m_Worth; } public double getCoverP(){ return m_CoverP; } public double getCoverG(){ return m_CoverG; } public double getAccuG(){ return m_AccuG; } /** * Prints this rule with the specified class label * * @param att the string standing for attribute in the consequent of this rule * @param cl the string standing for value in the consequent of this rule * @return a textual description of this rule with the specified class label */ public String toString(String att, String cl) { StringBuffer text = new StringBuffer(); if(m_Antds.size() > 0){ for(int j=0; j< (m_Antds.size()-1); j++) text.append("(" + ((Antd)(m_Antds.elementAt(j))).toString()+ ") and "); text.append("("+((Antd)(m_Antds.lastElement())).toString() + ")"); } text.append(" => " + att + " = " + cl); text.append(" ("+m_CoverG+"/"+(m_CoverG - m_AccuG)+") ["+ m_CoverP+"/"+(m_CoverP - m_Worth)+"]"); return text.toString(); } /** * Prints this rule * * @return a textual description of this rule */ public String toString() { return toString(m_ClassAttribute.name(), m_ClassAttribute.value((int)m_Class)); } } /** * The single antecedent in the rule, which is composed of an attribute and * the corresponding value. There are two inherited classes, namely NumericAntd * and NominalAntd in which the attributes are numeric and nominal respectively. */ private abstract class Antd implements Serializable { /** The attribute of the antecedent */ protected Attribute att; /** The attribute value of the antecedent. For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */ protected double value; /** The maximum infoGain achieved by this antecedent test */ protected double maxInfoGain; /** The accurate rate of this antecedent test on the growing data */ protected double accuRate; /** The coverage of this antecedent */ protected double cover; /** The accurate data for this antecedent */ protected double accu; /** Constructor*/ public Antd(Attribute a){ att=a; value=Double.NaN; maxInfoGain = 0; accuRate = Double.NaN; cover = Double.NaN; accu = Double.NaN; } /* The abstract members for inheritance */ public abstract Instances[] splitData(Instances data, double defAcRt, double cla); public abstract boolean isCover(Instance inst); public abstract String toString(); /* Get functions of this antecedent */ public Attribute getAttr(){ return att; } public double getAttrValue(){ return value; } public double getMaxInfoGain(){ return maxInfoGain; } public double getAccuRate(){ return accuRate; } public double getAccu(){ return accu; } public double getCover(){ return cover; } } /** * The antecedent with numeric attribute */ private class NumericAntd extends Antd { /** for serialization */ static final long serialVersionUID = 1968761518014492214L; /** The split point for this numeric antecedent */ private double splitPoint; /** Constructor*/ public NumericAntd(Attribute a){ super(a); splitPoint = Double.NaN; } /** Get split point of this numeric antecedent */ public double getSplitPoint(){ return splitPoint; } /** * Implements the splitData function. * This procedure is to split the data into two bags according * to the information gain of the numeric attribute value * The maximum infoGain is also calculated. * * @param insts the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances insts, double defAcRt, double cl){ Instances data = new Instances(insts); data.sort(att); int total=data.numInstances();// Total number of instances without // missing value for att int split=1; // Current split position int prev=0; // Previous split position int finalSplit=split; // Final split position maxInfoGain = 0; value = 0; // Compute minimum number of Instances required in each split double minSplit = 0.1 * (data.sumOfWeights()) / 2.0; if (Utils.smOrEq(minSplit,m_MinNo)) minSplit = m_MinNo; else if (Utils.gr(minSplit,25)) minSplit = 25; double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0; for(int x=0; x<data.numInstances(); x++){ Instance inst = data.instance(x); if(inst.isMissing(att)){ total = x; break; } sndCover += inst.weight(); if(Utils.eq(inst.classValue(), cl)) sndAccu += inst.weight(); } // Enough Instances with known values? if (Utils.sm(sndCover,(2*minSplit))) return null; if(total == 0) return null; // Data all missing for the attribute splitPoint = data.instance(total-1).value(att); for(; split < total; split++){ if(!Utils.eq(data.instance(split).value(att), data.instance(prev).value(att))){ // Can't split within same value for(int y=prev; y<split; y++){ Instance inst = data.instance(y); fstCover += inst.weight(); sndCover -= inst.weight(); if(Utils.eq(data.instance(y).classValue(), cl)){ fstAccu += inst.weight(); // First bag positive# ++ sndAccu -= inst.weight(); // Second bag positive# -- } } if(Utils.sm(fstCover, minSplit) || Utils.sm(sndCover, minSplit)){ prev=split; // Cannot split because either continue; // split has not enough data } double fstAccuRate = 0, sndAccuRate = 0; if(!Utils.eq(fstCover,0)) fstAccuRate = fstAccu/fstCover; if(!Utils.eq(sndCover,0)) sndAccuRate = sndAccu/sndCover; /* Which bag has higher information gain? */ boolean isFirst; double fstInfoGain, sndInfoGain; double accRate, infoGain, coverage, accurate; fstInfoGain = Utils.eq(fstAccuRate, 0) ? 0 : (fstAccu*(Utils.log2(fstAccuRate) - Utils.log2(defAcRt))); sndInfoGain = Utils.eq(sndAccuRate, 0) ? 0 : (sndAccu*(Utils.log2(sndAccuRate) - Utils.log2(defAcRt))); if(Utils.gr(fstInfoGain,sndInfoGain) || (Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.grOrEq(fstAccuRate,sndAccuRate)))){ isFirst = true; infoGain = fstInfoGain; accRate = fstAccuRate; accurate = fstAccu; coverage = fstCover; } else{ isFirst = false; infoGain = sndInfoGain; accRate = sndAccuRate; accurate = sndAccu; coverage = sndCover; } boolean isUpdate = Utils.gr(infoGain, maxInfoGain); /* Check whether so far the max infoGain */ if(isUpdate){ splitPoint = (data.instance(split).value(att) + data.instance(prev).value(att))/2; value = ((isFirst) ? 0 : 1); accuRate = accRate; accu = accurate; cover = coverage; maxInfoGain = infoGain; finalSplit = split; } prev=split; } } /* Split the data */ Instances[] splitData = new Instances[2]; splitData[0] = new Instances(data, 0, finalSplit); splitData[1] = new Instances(data, finalSplit, total-finalSplit); return splitData; } /** * Whether the instance is covered by this antecedent * * @param inst the instance in question * @return the boolean value indicating whether the instance is covered * by this antecedent */ public boolean isCover(Instance inst){ boolean isCover=false; if(!inst.isMissing(att)){ if(Utils.eq(value, 0)){ if(Utils.smOrEq(inst.value(att), splitPoint)) isCover=true; } else if(Utils.gr(inst.value(att), splitPoint)) isCover=true; } return isCover; } /** * Prints this antecedent * * @return a textual description of this antecedent */ public String toString() { String symbol = Utils.eq(value, 0.0) ? " <= " : " > "; return (att.name() + symbol + Utils.doubleToString(splitPoint, 6)); } } /** * The antecedent with nominal attribute */ private class NominalAntd extends Antd { /** for serialization */ static final long serialVersionUID = -256386137196078004L; /* The parameters of infoGain calculated for each attribute value */ private double[] accurate; private double[] coverage; private double[] infoGain; /** Constructor*/ public NominalAntd(Attribute a){ super(a); int bag = att.numValues(); accurate = new double[bag]; coverage = new double[bag]; infoGain = new double[bag]; } /** * Implements the splitData function. * This procedure is to split the data into bags according * to the nominal attribute value * The infoGain for each bag is also calculated. * * @param data the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances data, double defAcRt, double cl){ int bag = att.numValues(); Instances[] splitData = new Instances[bag]; for(int x=0; x<bag; x++){ accurate[x] = coverage[x] = infoGain[x] = 0; splitData[x] = new Instances(data, data.numInstances()); } for(int x=0; x<data.numInstances(); x++){ Instance inst=data.instance(x); if(!inst.isMissing(att)){ int v = (int)inst.value(att); splitData[v].add(inst); coverage[v] += inst.weight(); if(Utils.eq(inst.classValue(), cl)) accurate[v] += inst.weight(); } } // Check if >=2 splits have more than the minimal data int count=0; for(int x=0; x<bag; x++){ double t = coverage[x]; if(Utils.grOrEq(t, m_MinNo)){ double p = accurate[x]; if(!Utils.eq(t, 0.0)) infoGain[x] = p *((Utils.log2(p/t)) - (Utils.log2(defAcRt))); ++count; } } if(count < 2) // Don't split return null; value = (double)Utils.maxIndex(infoGain); cover = coverage[(int)value]; accu = accurate[(int)value]; if(!Utils.eq(cover,0)) accuRate = accu / cover; else accuRate = 0; maxInfoGain = infoGain [(int)value];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -