📄 jrip.java
字号:
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String debugTipText() {
return "Whether debug information is output to the console.";
}
public void setDebug(boolean d){m_Debug = d;}
public boolean getDebug(){ return m_Debug; }
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String checkErrorRateTipText() {
return "Whether check for error rate >= 1/2 is included" +
" in stopping criterion.";
}
public void setCheckErrorRate(boolean d){ m_CheckErr = d;}
public boolean getCheckErrorRate(){ return m_CheckErr; }
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String usePruningTipText() {
return "Whether pruning is performed.";
}
public void setUsePruning(boolean d){ m_UsePruning = d;}
public boolean getUsePruning(){ return m_UsePruning; }
/**
* Get the ruleset generated by Ripper
*
* @return the ruleset
*/
public FastVector getRuleset(){ return m_Ruleset; }
/**
* Get the statistics of the ruleset in the given position
*
* @param pos the position of the stats, assuming correct
*/
public RuleStats getRuleStats(int pos) {
return (RuleStats)m_RulesetStats.elementAt(pos);
}
/**
* The single antecedent in the rule, which is composed of an attribute and
* the corresponding value. There are two inherited classes, namely NumericAntd
* and NominalAntd in which the attributes are numeric and nominal respectively.
*/
private abstract class Antd
implements WeightedInstancesHandler, Copyable, Serializable {
/* The attribute of the antecedent */
protected Attribute att;
/* The attribute value of the antecedent.
For numeric attribute, value is either 0(1st bag) or 1(2nd bag) */
protected double value;
/* The maximum infoGain achieved by this antecedent test
* in the growing data */
protected double maxInfoGain;
/* The accurate rate of this antecedent test on the growing data */
protected double accuRate;
/* The coverage of this antecedent in the growing data */
protected double cover;
/* The accurate data for this antecedent in the growing data */
protected double accu;
/* Constructor*/
public Antd(Attribute a){
att=a;
value=Double.NaN;
maxInfoGain = 0;
accuRate = Double.NaN;
cover = Double.NaN;
accu = Double.NaN;
}
/* The abstract members for inheritance */
public abstract Instances[] splitData(Instances data, double defAcRt,
double cla);
public abstract boolean covers(Instance inst);
public abstract String toString();
/** Implements Copyable */
public abstract Object copy();
/* Get functions of this antecedent */
public Attribute getAttr(){ return att; }
public double getAttrValue(){ return value; }
public double getMaxInfoGain(){ return maxInfoGain; }
public double getAccuRate(){ return accuRate; }
public double getAccu(){ return accu; }
public double getCover(){ return cover; }
}
/**
* The antecedent with numeric attribute
*/
private class NumericAntd extends Antd{
/* The split point for this numeric antecedent */
private double splitPoint;
/* Constructor*/
public NumericAntd(Attribute a){
super(a);
splitPoint = Double.NaN;
}
/* Get split point of this numeric antecedent */
public double getSplitPoint(){ return splitPoint; }
/** Implements Copyable */
public Object copy(){
NumericAntd na = new NumericAntd(getAttr());
na.value = this.value;
na.splitPoint = this.splitPoint;
return na;
}
/**
* Implements the splitData function.
* This procedure is to split the data into two bags according
* to the information gain of the numeric attribute value
* The maximum infoGain is also calculated.
*
* @param insts the data to be split
* @param defAcRt the default accuracy rate for data
* @param cl the class label to be predicted
* @return the array of data after split
*/
public Instances[] splitData(Instances insts, double defAcRt,
double cl){
Instances data = insts;
int total=data.numInstances();// Total number of instances without
// missing value for att
int split=1; // Current split position
int prev=0; // Previous split position
int finalSplit=split; // Final split position
maxInfoGain = 0;
value = 0;
double fstCover=0, sndCover=0, fstAccu=0, sndAccu=0;
data.sort(att);
// Find the las instance without missing value
for(int x=0; x<data.numInstances(); x++){
Instance inst = data.instance(x);
if(inst.isMissing(att)){
total = x;
break;
}
sndCover += inst.weight();
if(Utils.eq(inst.classValue(), cl))
sndAccu += inst.weight();
}
if(total == 0) return null; // Data all missing for the attribute
splitPoint = data.instance(total-1).value(att);
for(; split <= total; split++){
if((split == total) ||
(data.instance(split).value(att) > // Can't split within
data.instance(prev).value(att))){ // same value
for(int y=prev; y<split; y++){
Instance inst = data.instance(y);
fstCover += inst.weight();
if(Utils.eq(data.instance(y).classValue(), cl)){
fstAccu += inst.weight(); // First bag positive# ++
}
}
double fstAccuRate = (fstAccu+1.0)/(fstCover+1.0),
sndAccuRate = (sndAccu+1.0)/(sndCover+1.0);
/* Which bag has higher information gain? */
boolean isFirst;
double fstInfoGain, sndInfoGain;
double accRate, infoGain, coverage, accurate;
fstInfoGain =
//Utils.eq(defAcRt, 1.0) ?
//fstAccu/(double)numConds :
fstAccu*(Utils.log2(fstAccuRate)-Utils.log2(defAcRt));
sndInfoGain =
//Utils.eq(defAcRt, 1.0) ?
//sndAccu/(double)numConds :
sndAccu*(Utils.log2(sndAccuRate)-Utils.log2(defAcRt));
if(fstInfoGain > sndInfoGain){
isFirst = true;
infoGain = fstInfoGain;
accRate = fstAccuRate;
accurate = fstAccu;
coverage = fstCover;
}
else{
isFirst = false;
infoGain = sndInfoGain;
accRate = sndAccuRate;
accurate = sndAccu;
coverage = sndCover;
}
/* Check whether so far the max infoGain */
if(infoGain > maxInfoGain){
splitPoint = data.instance(prev).value(att);
value = (isFirst) ? 0 : 1;
accuRate = accRate;
accu = accurate;
cover = coverage;
maxInfoGain = infoGain;
finalSplit = (isFirst) ? split : prev;
}
for(int y=prev; y<split; y++){
Instance inst = data.instance(y);
sndCover -= inst.weight();
if(Utils.eq(data.instance(y).classValue(), cl)){
sndAccu -= inst.weight(); // Second bag positive# --
}
}
prev=split;
}
}
/* Split the data */
Instances[] splitData = new Instances[2];
splitData[0] = new Instances(data, 0, finalSplit);
splitData[1] = new Instances(data, finalSplit, total-finalSplit);
return splitData;
}
/**
* Whether the instance is covered by this antecedent
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is covered
* by this antecedent
*/
public boolean covers(Instance inst){
boolean isCover=true;
if(!inst.isMissing(att)){
if((int)value == 0){ // First bag
if(inst.value(att) > splitPoint)
isCover=false;
}
else if(inst.value(att) < splitPoint) // Second bag
isCover=false;
}
else
isCover = false;
return isCover;
}
/**
* Prints this antecedent
*
* @return a textual description of this antecedent
*/
public String toString() {
String symbol = ((int)value == 0) ? " <= " : " >= ";
return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
}
}
/**
* The antecedent with nominal attribute
*/
private class NominalAntd extends Antd{
/* The parameters of infoGain calculated for each attribute value
* in the growing data */
private double[] accurate;
private double[] coverage;
/* Constructor*/
public NominalAntd(Attribute a){
super(a);
int bag = att.numValues();
accurate = new double[bag];
coverage = new double[bag];
}
/** Implements Copyable */
public Object copy(){
Antd antec = new NominalAntd(getAttr());
antec.value = this.value;
return antec;
}
/**
* Implements the splitData function.
* This procedure is to split the data into bags according
* to the nominal attribute value
* The infoGain for each bag is also calculated.
*
* @param data the data to be split
* @param defAcRt the default accuracy rate for data
* @param cl the class label to be predicted
* @return the array of data after split
*/
public Instances[] splitData(Instances data, double defAcRt,
double cl){
int bag = att.numValues();
Instances[] splitData = new Instances[bag];
for(int x=0; x<bag; x++){
splitData[x] = new Instances(data, data.numInstances());
accurate[x] = 0;
coverage[x] = 0;
}
for(int x=0; x<data.numInstances(); x++){
Instance inst=data.instance(x);
if(!inst.isMissing(att)){
int v = (int)inst.value(att);
splitData[v].add(inst);
coverage[v] += inst.weight();
if((int)inst.classValue() == (int)cl)
accurate[v] += inst.weight();
}
}
for(int x=0; x<bag; x++){
double t = coverage[x]+1.0;
double p = accurate[x] + 1.0;
double infoGain =
//Utils.eq(defAcRt, 1.0) ?
//accurate[x]/(double)numConds :
accurate[x]*(Utils.log2(p/t)-Utils.log2(defAcRt));
if(infoGain > maxInfoGain){
maxInfoGain = infoGain;
cover = coverage[x];
accu = accurate[x];
accuRate = p/t;
value = (double)x;
}
}
return splitData;
}
/**
* Whether the instance is covered by this antecedent
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is
* covered by this antecedent
*/
public boolean covers(Instance inst){
boolean isCover=false;
if(!inst.isMissing(att)){
if((int)inst.value(att) == (int)value)
isCover=true;
}
return isCover;
}
/**
* Prints this antecedent
*
* @return a textual description of this antecedent
*/
public String toString() {
return (att.name() + " = " +att.value((int)value));
}
}
/**
* This class implements a single rule that predicts specified class.
*
* A rule consists of antecedents "AND"ed together and the consequent
* (class value) for the classification.
* In this class, the Information Gain (p*[log(p/t) - log(P/T)]) is used to
* select an antecedent and Reduced Error Prunning (REP) with the metric
* of accuracy rate p/(p+n) or (TP+TN)/(P+N) is used to prune the rule.
*/
protected class RipperRule extends Rule{
/** The internal representation of the class label to be predicted*/
private double m_Consequent = -1;
/** The vector of antecedents of this rule*/
protected FastVector m_Antds = null;
public void setConsequent(double cl){ m_Consequent = cl; }
public double getConsequent(){ return m_Consequent; }
/** Constructor */
public RipperRule(){
m_Antds = new FastVector();
}
/**
* Get a shallow copy of this rule
*
* @return the copy
*/
public Object copy(){
RipperRule copy = new RipperRule();
copy.setConsequent(getConsequent());
copy.m_Antds = (FastVector)this.m_Antds.copyElements();
return copy;
}
/**
* Whether the instance covered by this rule
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance
* is covered by this rule
*/
public boolean covers(Instance datum){
boolean isCover=true;
for(int i=0; i<m_Antds.size(); i++){
Antd antd = (Antd)m_Antds.elementAt(i);
if(!antd.covers(datum)){
isCover = false;
break;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -