📄 jrip.java
字号:
/* Get functions of this antecedent */ public Attribute getAttr(){ return att; } public double getAttrValue(){ return value; } public double getMaxInfoGain(){ return maxInfoGain; } public double getAccuRate(){ return accuRate; } public double getAccu(){ return accu; } public double getCover(){ return cover; } } /** * The antecedent with numeric attribute */ private class NumericAntd extends Antd{ /* The split point for this numeric antecedent */ private double splitPoint; /* Constructor*/ public NumericAntd(Attribute a){ super(a); splitPoint = Double.NaN; } /* Get split point of this numeric antecedent */ public double getSplitPoint(){ return splitPoint; } /** Implements Copyable */ public Object copy(){ NumericAntd na = new NumericAntd(getAttr()); na.value = this.value; na.splitPoint = this.splitPoint; return na; } /** * Implements the splitData function. * This procedure is to split the data into two bags according * to the information gain of the numeric attribute value * The maximum infoGain is also calculated. * * @param insts the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances insts, double defAcRt, double cl){ Instances data = insts; int total=data.numInstances();// Total number of instances without // missing value for att int split=1; // Current split position int prev=0; // Previous split position int finalSplit=split; // Final split position maxInfoGain = 0; value = 0; double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0; data.sort(att); // Find the las instance without missing value for(int x=0; x<data.numInstances(); x++){ Instance inst = data.instance(x); if(inst.isMissing(att)){ total = x; break; } sndCover += inst.weight(); if(Utils.eq(inst.classValue(), cl)) sndAccu += inst.weight(); } if(total == 0) return null; // Data all missing for the attribute splitPoint = data.instance(total-1).value(att); for(; split <= total; split++){ if((split == total) || (data.instance(split).value(att) > // Can't split within data.instance(prev).value(att))){ // same value for(int y=prev; y<split; y++){ Instance inst = data.instance(y); fstCover += inst.weight(); if(Utils.eq(data.instance(y).classValue(), cl)){ fstAccu += inst.weight(); // First bag positive# ++ } } double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0), sndAccuRate = (sndAccu+1.0)/(sndCover+1.0); /* Which bag has higher information gain? */ boolean isFirst; double fstInfoGain, sndInfoGain; double accRate, infoGain, coverage, accurate; fstInfoGain = //Utils.eq(defAcRt, 1.0) ? //fstAccu/(double)numConds : fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt)); sndInfoGain = //Utils.eq(defAcRt, 1.0) ? //sndAccu/(double)numConds : sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt)); if(fstInfoGain > sndInfoGain){ isFirst = true; infoGain = fstInfoGain; accRate = fstAccuRate; accurate = fstAccu; coverage = fstCover; } else{ isFirst = false; infoGain = sndInfoGain; accRate = sndAccuRate; accurate = sndAccu; coverage = sndCover; } /* Check whether so far the max infoGain */ if(infoGain > maxInfoGain){ splitPoint = data.instance(prev).value(att); value = (isFirst) ? 0 : 1; accuRate = accRate; accu = accurate; cover = coverage; maxInfoGain = infoGain; finalSplit = (isFirst) ? split : prev; } for(int y=prev; y<split; y++){ Instance inst = data.instance(y); sndCover -= inst.weight(); if(Utils.eq(data.instance(y).classValue(), cl)){ sndAccu -= inst.weight(); // Second bag positive# -- } } prev=split; } } /* Split the data */ Instances[] splitData = new Instances[2]; splitData[0] = new Instances(data, 0, finalSplit); splitData[1] = new Instances(data, finalSplit, total-finalSplit); return splitData; } /** * Whether the instance is covered by this antecedent * * @param inst the instance in question * @return the boolean value indicating whether the instance is covered * by this antecedent */ public boolean covers(Instance inst){ boolean isCover=true; if(!inst.isMissing(att)){ if((int)value == 0){ // First bag if(inst.value(att) > splitPoint) isCover=false; } else if(inst.value(att) < splitPoint) // Second bag isCover=false; } else isCover = false; return isCover; } /** * Prints this antecedent * * @return a textual description of this antecedent */ public String toString() { String symbol = ((int)value == 0) ? " <= " : " >= "; return (att.name() + symbol + Utils.doubleToString(splitPoint, 6)); } } /** * The antecedent with nominal attribute */ private class NominalAntd extends Antd{ /* The parameters of infoGain calculated for each attribute value * in the growing data */ private double[] accurate; private double[] coverage; /* Constructor*/ public NominalAntd(Attribute a){ super(a); int bag = att.numValues(); accurate = new double[bag]; coverage = new double[bag]; } /** Implements Copyable */ public Object copy(){ Antd antec = new NominalAntd(getAttr()); antec.value = this.value; return antec; } /** * Implements the splitData function. * This procedure is to split the data into bags according * to the nominal attribute value * The infoGain for each bag is also calculated. * * @param data the data to be split * @param defAcRt the default accuracy rate for data * @param cl the class label to be predicted * @return the array of data after split */ public Instances[] splitData(Instances data, double defAcRt, double cl){ int bag = att.numValues(); Instances[] splitData = new Instances[bag]; for(int x=0; x<bag; x++){ splitData[x] = new Instances(data, data.numInstances()); accurate[x] = 0; coverage[x] = 0; } for(int x=0; x<data.numInstances(); x++){ Instance inst=data.instance(x); if(!inst.isMissing(att)){ int v = (int)inst.value(att); splitData[v].add(inst); coverage[v] += inst.weight(); if((int)inst.classValue() == (int)cl) accurate[v] += inst.weight(); } } for(int x=0; x<bag; x++){ double t = coverage[x]+1.0; double p = accurate[x] + 1.0; double infoGain = //Utils.eq(defAcRt, 1.0) ? //accurate[x]/(double)numConds : accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt)); if(infoGain > maxInfoGain){ maxInfoGain = infoGain; cover = coverage[x]; accu = accurate[x]; accuRate = p/t; value = (double)x; } } return splitData; } /** * Whether the instance is covered by this antecedent * * @param inst the instance in question * @return the boolean value indicating whether the instance is * covered by this antecedent */ public boolean covers(Instance inst){ boolean isCover=false; if(!inst.isMissing(att)){ if((int)inst.value(att) == (int)value) isCover=true; } return isCover; } /** * Prints this antecedent * * @return a textual description of this antecedent */ public String toString() { return (att.name() + " = " +att.value((int)value)); } } /** * This class implements a single rule that predicts specified class. * * A rule consists of antecedents "AND"ed together and the consequent * (class value) for the classification. * In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to * select an antecedent and Reduced Error Prunning (REP) with the metric * of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule. */ protected class RipperRule extends Rule{ /** The internal representation of the class label to be predicted*/ private double m_Consequent = -1; /** The vector of antecedents of this rule*/ protected FastVector m_Antds = null; public void setConsequent(double cl){ m_Consequent = cl; } public double getConsequent(){ return m_Consequent; } /** Constructor */ public RipperRule(){ m_Antds = new FastVector(); } /** * Get a shallow copy of this rule * * @return the copy */ public Object copy(){ RipperRule copy = new RipperRule(); copy.setConsequent(getConsequent()); copy.m_Antds = (FastVector)this.m_Antds.copyElements(); return copy; } /** * Whether the instance covered by this rule * * @param inst the instance in question * @return the boolean value indicating whether the instance * is covered by this rule */ public boolean covers(Instance datum){ boolean isCover=true; for(int i=0; i<m_Antds.size(); i++){ Antd antd = (Antd)m_Antds.elementAt(i); if(!antd.covers(datum)){ isCover = false; break; } } return isCover; } /** * Whether this rule has antecedents, i.e. whether it is a default rule * * @return the boolean value indicating whether the rule has antecedents */ public boolean hasAntds(){ if (m_Antds == null) return false; else return (m_Antds.size() > 0); } /** * the number of antecedents of the rule * * @return the size of this rule */ public double size(){ return (double)m_Antds.size(); } /** * Private function to compute default number of accurate instances * in the specified data for the consequent of the rule * * @param data the data in question * @return the default accuracy number */ private double computeDefAccu(Instances data){ double defAccu=0; for(int i=0; i<data.numInstances(); i++){ Instance inst = data.instance(i); if((int)inst.classValue() == (int)m_Consequent) defAccu += inst.weight(); } return defAccu; } /** * Build one rule using the growing data * * @param data the growing data used to build the rule * @exception if the consequent is not set yet */ public void grow(Instances data) throws Exception { if(m_Consequent == -1) throw new Exception(" Consequent not set yet."); Instances growData = data; double sumOfWeights = growData.sumOfWeights(); if(!Utils.gr(sumOfWeights, 0.0)) return; /* Compute the default accurate rate of the growing data */ double defAccu = computeDefAccu(growData); double defAcRt = (defAccu+1.0)/(sumOfWeights+1.0); /* Keep the record of which attributes have already been used*/ boolean[] used=new boolean [growData.numAttributes()]; for (int k=0; k<used.length; k++) used[k]=false; int numUnused=used.length;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -