📄 ridor.java
字号:
if(Utils.gr(wr, bestWorthRate) || (Utils.eq(wr, bestWorthRate) && Utils.gr(w, bestWorth))){ bestRule = rule; bestWorthRate = wr; bestWorth = w; } } if (bestRule == null) throw new Exception("Something wrong here inside findRule()!"); if(Utils.sm(bestWorthRate, 0.5) || (!bestRule.hasAntds())) break; // No more good rules generated Instances newData = new Instances(data); data = new Instances(newData, 0);// Empty the data classCount = 0; double cover = 0; // Coverage of this rule on whole data for(int l=0; l<newData.numInstances(); l++){ Instance datum = newData.instance(l); if(!bestRule.isCover(datum)){// Data not covered by the previous rule data.add(datum); if(Utils.eq(datum.classValue(), 0)) classCount += datum.weight(); // The predicted class in the data } else cover += datum.weight(); } wAcRt += computeWeightedAcRt(bestWorthRate, cover, total); ruleset.addElement(bestRule); } /* The weighted def. accuracy */ double wDefAcRt = (data.sumOfWeights()-classCount) / total; wAcRt += wDefAcRt; return wAcRt; } /** * Private function to combine two data * * @param data1 the data to which data2 is appended * @param data2 the data to be appended to data1 * @return the merged data */ private Instances append(Instances data1, Instances data2){ Instances data = new Instances(data1); for(int i=0; i<data2.numInstances(); i++) data.add(data2.instance(i)); return data; } /** * Compute the weighted average of accuracy rate of a certain rule * Each rule is weighted by its coverage proportion in the whole data. * So the accuracy rate of one ruleset is actually * * (worth rate) * (coverage proportion) * * coverage of the rule on the whole data * where coverage proportion = ----------------------------------------- * the whole data size fed into the ruleset * * @param worthRt the worth rate * @param cover the coverage of the rule on the whole data * @param total the total data size fed into the ruleset * @return the weighted accuracy rate of this rule */ private double computeWeightedAcRt(double worthRt, double cover, double total){ return (worthRt * (cover/total)); } /** * Builds an array of data according to their true class label * Each bag of data is filtered through the rule specified and * is totally covered by this rule. * Both the data covered and uncovered by the rule will be returned * by the procedure. * * @param rule the rule covering the data * @param dataByClass the array of data to be covered by the rule * @return the arrays of data both covered and not covered by the rule */ private Instances[][] divide(RidorRule rule, Instances[] dataByClass){ int len = dataByClass.length; Instances[][] dataBags = new Instances[2][len]; for(int i=0; i < len; i++){ Instances[] dvdData = rule.coveredByRule(dataByClass[i]); dataBags[0][i] = dvdData[0]; // Covered by the rule dataBags[1][i] = dvdData[1]; // Not covered by the rule } return dataBags; } /** * The size of the certain node of Ridor, i.e. the * number of rules generated within and below this node * * @return the size of this node */ public int size(){ int size = 0; if(rules != null){ for(int i=0; i < rules.length; i++) size += excepts[i].size(); // The children's size size += rules.length; // This node's size } return size; } /** * Prints the all the rules of one node of Ridor. * * @return a textual description of one node of Ridor */ public String toString(){ StringBuffer text = new StringBuffer(); if(level == 1) text.append(m_Class.name() + " = " + m_Class.value((int)getDefClass())+ " ("+m_Cover+"/"+m_Err+")\n"); if(rules != null){ for(int i=0; i < rules.length; i++){ for(int j=0; j < level; j++) text.append(" "); String cl = m_Class.value((int)(excepts[i].getDefClass())); text.append(" Except " + rules[i].toString(m_Class.name(), cl)+ "\n" + excepts[i].toString()); } } return text.toString(); } } /** * This class implements a single rule that predicts the 2-class distribution. * * A rule consists of antecedents "AND"ed together and the consequent (class value) * for the classification. In this case, the consequent is the distribution of * the available classes (always 2 classes) in the dataset. * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to select * an antecedent and Reduced Error Prunning (REP) is used to prune the rule. * */ private class RidorRule implements WeightedInstancesHandler, Serializable { /** for serialization */ static final long serialVersionUID = 4375199423973848157L; /** The internal representation of the class label to be predicted*/ private double m_Class = -1; /** The class attribute of the data*/ private Attribute m_ClassAttribute; /** The vector of antecedents of this rule*/ protected FastVector m_Antds = null; /** The worth rate of this rule, in this case, accuracy rate in the pruning data*/ private double m_WorthRate = 0; /** The worth value of this rule, in this case, accurate # in pruning data*/ private double m_Worth = 0; /** The sum of weights of the data covered by this rule in the pruning data */ private double m_CoverP = 0; /** The accurate and covered data of this rule in the growing data */ private double m_CoverG = 0, m_AccuG = 0; /** The access functions for parameters */ public void setPredictedClass(double cl){ m_Class = cl; } public double getPredictedClass(){ return m_Class; } /** * Builds a single rule learner with REP dealing with 2 classes. * This rule learner always tries to predict the class with label * m_Class. * * @param instances the training data * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { m_ClassAttribute = instances.classAttribute(); if (!m_ClassAttribute.isNominal()) throw new UnsupportedClassTypeException(" Only nominal class, please."); if(instances.numClasses() != 2) throw new Exception(" Only 2 classes, please."); Instances data = new Instances(instances); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" No training data."); data.deleteWithMissingClass(); if(Utils.eq(data.sumOfWeights(),0)) throw new Exception(" The class labels of all the training data are missing."); if(data.numInstances() < m_Folds) throw new Exception(" Not enough data for REP."); m_Antds = new FastVector(); /* Split data into Grow and Prune*/ m_Random = new Random(m_Seed); data.randomize(m_Random); data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule } /** * Find all the instances in the dataset covered by this rule. * The instances not covered will also be deducted from the the original data * and returned by this procedure. * * @param insts the dataset to be covered by this rule. * @return the instances covered and not covered by this rule */ public Instances[] coveredByRule(Instances insts){ Instances[] data = new Instances[2]; data[0] = new Instances(insts, insts.numInstances()); data[1] = new Instances(insts, insts.numInstances()); for(int i=0; i<insts.numInstances(); i++){ Instance datum = insts.instance(i); if(isCover(datum)) data[0].add(datum); // Covered by this rule else data[1].add(datum); // Not covered by this rule } return data; } /** * Whether the instance covered by this rule * * @param inst the instance in question * @return the boolean value indicating whether the instance is covered by this rule */ public boolean isCover(Instance datum){ boolean isCover=true; for(int i=0; i<m_Antds.size(); i++){ Antd antd = (Antd)m_Antds.elementAt(i); if(!antd.isCover(datum)){ isCover = false; break; } } return isCover; } /** * Whether this rule has antecedents, i.e. whether it is a default rule * * @return the boolean value indicating whether the rule has antecedents */ public boolean hasAntds(){ if (m_Antds == null) return false; else return (m_Antds.size() > 0); } /** * Build one rule using the growing data * * @param data the growing data used to build the rule */ private void grow(Instances data){ Instances growData = new Instances(data); m_AccuG = computeDefAccu(growData); m_CoverG = growData.sumOfWeights(); /* Compute the default accurate rate of the growing data */ double defAcRt= m_AccuG / m_CoverG; /* Keep the record of which attributes have already been used*/ boolean[] used=new boolean [growData.numAttributes()]; for (int k=0; k<used.length; k++) used[k]=false; int numUnused=used.length; double maxInfoGain; boolean isContinue = true; // The stopping criterion of this rule while (isContinue){ maxInfoGain = 0; // We require that infoGain be positive /* Build a list of antecedents */ Antd oneAntd=null; Instances coverData = null; Enumeration enumAttr=growData.enumerateAttributes(); int index=-1; /* Build one condition based on all attributes not used yet*/ while (enumAttr.hasMoreElements()){ Attribute att= (Attribute)(enumAttr.nextElement()); index++; Antd antd =null; if(att.isNumeric()) antd = new NumericAntd(att); else antd = new NominalAntd(att); if(!used[index]){ /* Compute the best information gain for each attribute, it's stored in the antecedent formed by this attribute. This procedure returns the data covered by the antecedent*/ Instances coveredData = computeInfoGain(growData, defAcRt, antd); if(coveredData != null){ double infoGain = antd.getMaxInfoGain(); if(Utils.gr(infoGain, maxInfoGain)){ oneAntd=antd; coverData = coveredData; maxInfoGain = infoGain; } } } } if(oneAntd == null) return; //Numeric attributes can be used more than once if(!oneAntd.getAttr().isNumeric()){ used[oneAntd.getAttr().index()]=true; numUnused--; } m_Antds.addElement((Object)oneAntd); growData = coverData;// Grow data size is shrinking defAcRt = oneAntd.getAccuRate(); /* Stop if no more data, rule perfect, no more attributes */ if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0)) isContinue = false; } } /** * Compute the best information gain for the specified antecedent * * @param data the data based on which the infoGain is computed * @param defAcRt the default accuracy rate of data * @param antd the specific antecedent * @return the data covered by the antecedent */ private Instances computeInfoGain(Instances instances, double defAcRt, Antd antd){ Instances data = new Instances(instances); /* Split the data into bags. The information gain of each bag is also calculated in this procedure */ Instances[] splitData = antd.splitData(data, defAcRt, m_Class); /* Get the bag of data to be used for next antecedents */ if(splitData != null) return splitData[(int)antd.getAttrValue()]; else return null; } /** * Prune the rule using the pruning data and update the worth parameters for this rule * The accuracy rate is used to prune the rule. * * @param pruneData the pruning data used to prune the rule */ private void prune(Instances pruneData){ Instances data=new Instances(pruneData); double total = data.sumOfWeights(); /* The default accurate# and the the accuracy rate on pruning data */ double defAccu=0, defAccuRate=0; int size=m_Antds.size(); if(size == 0) return; // Default rule before pruning double[] worthRt = new double[size]; double[] coverage = new double[size]; double[] worthValue = new double[size]; for(int w=0; w<size; w++){ worthRt[w]=coverage[w]=worthValue[w]=0.0; } /* Calculate accuracy parameters for all the antecedents in this rule */ for(int x=0; x<size; x++){ Antd antd=(Antd)m_Antds.elementAt(x); Attribute attr= antd.getAttr(); Instances newData = new Instances(data); data = new Instances(newData, newData.numInstances()); // Make data empty for(int y=0; y<newData.numInstances(); y++){ Instance ins=newData.instance(y);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -