📄 conjunctiverule.java
字号:
* Set if NOT randomize the data before split to growing and
* pruning data. If NOT set, the seed of randomization is
* specified by the -S option. (Default: randomize) <p>
*
* -S <br>
* Seed of randomization. (Default: 1)<p>
*
* -E <br>
* Set whether consider the exclusive expressions for nominal
* attribute split. (Default: false) <p>
*
* -M number <br>
* Set the minimal weights of instances within a split.
* (Default: 2) <p>
*
* -P number <br>
* Set the number of antecedents allowed in the rule if pre-pruning
* is used. If this value is other than -1, then pre-pruning will be
* used, otherwise the rule uses REP. (Default: -1) <p>
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(6);
newVector.addElement(new Option("\tSet number of folds for REP\n" +
"\tOne fold is used as pruning set.\n" +
"\t(default 3)","N", 1, "-N <number of folds>"));
newVector.addElement(new Option("\tSet if NOT uses randomization\n" +
"\t(default:use randomization)","R", 0, "-R"));
newVector.addElement(new Option("\tSet whether consider the exclusive\n" +
"\texpressions for nominal attributes\n"+
"\t(default false)","E", 0, "-E"));
newVector.addElement(new Option("\tSet the minimal weights of instances\n" +
"\twithin a split.\n" +
"\t(default 2.0)","M", 1, "-M <min. weights>"));
newVector.addElement(new Option("\tSet number of antecedents for pre-pruning\n" +
"\tif -1, then REP is used\n" +
"\t(default -1)","P", 1, "-P <number of antecedents>"));
newVector.addElement(new Option("\tSet the seed of randomization\n" +
"\t(default 1)","S", 1, "-S <seed>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String numFoldsString = Utils.getOption('N', options);
if (numFoldsString.length() != 0)
m_Folds = Integer.parseInt(numFoldsString);
else
m_Folds = 3;
String minNoString = Utils.getOption('M', options);
if (minNoString.length() != 0)
m_MinNo = Double.parseDouble(minNoString);
else
m_MinNo = 2.0;
String seedString = Utils.getOption('S', options);
if (seedString.length() != 0)
m_Seed = Integer.parseInt(seedString);
else
m_Seed = 1;
String numAntdsString = Utils.getOption('P', options);
if (numAntdsString.length() != 0)
m_NumAntds = Integer.parseInt(numAntdsString);
else
m_NumAntds = -1;
m_IsExclude = Utils.getFlag('E', options);
}
/**
* Gets the current settings of the Classifier.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [9];
int current = 0;
options[current++] = "-N"; options[current++] = "" + m_Folds;
options[current++] = "-M"; options[current++] = "" + m_MinNo;
options[current++] = "-P"; options[current++] = "" + m_NumAntds;
options[current++] = "-S"; options[current++] = "" + m_Seed;
if(m_IsExclude)
options[current++] = "-E";
while (current < options.length)
options[current++] = "";
return options;
}
/** The access functions for parameters */
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String foldsTipText() {
return "Determines the amount of data used for pruning. One fold is used for "
+ "pruning, the rest for growing the rules.";
}
public void setFolds(int folds){ m_Folds = folds; }
public int getFolds(){ return m_Folds; }
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "The seed used for randomizing the data.";
}
public void setSeed(long s){ m_Seed = s; }
public long getSeed(){ return m_Seed; }
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String exclusiveTipText() {
return "Set whether to consider exclusive expressions for nominal "
+ "attribute splits.";
}
public boolean getExclusive(){ return m_IsExclude;}
public void setExclusive(boolean e){ m_IsExclude = e;}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String minNoTipText() {
return "The minimum total weight of the instances in a rule.";
}
public void setMinNo(double m){ m_MinNo = m; }
public double getMinNo(){ return m_MinNo; }
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numAntdsTipText() {
return "Set the number of antecedents allowed in the rule if "
+ "pre-pruning is used. If this value is other than -1, then "
+ "pre-pruning will be used, otherwise the rule uses reduced-error "
+ "pruning.";
}
public void setNumAntds(int n){ m_NumAntds = n; }
public int getNumAntds(){ return m_NumAntds; }
/**
* Builds a single rule learner with REP dealing with nominal classes or
* numeric classes.
* For nominal classes, this rule learner predicts a distribution on
* the classes.
* For numeric classes, this learner predicts a single value.
*
* @param instances the training data
* @exception Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances instances) throws Exception {
if (instances.checkForStringAttributes())
throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
Instances data = new Instances(instances);
if(data.numInstances() == 0)
throw new Exception("No training data!");
data.deleteWithMissingClass();
if(data.numInstances() == 0)
throw new Exception("Not training data without missing class values.");
if(data.numInstances() < m_Folds)
throw new Exception("Not enough data for REP.");
m_ClassAttribute = data.classAttribute();
if(m_ClassAttribute.isNominal())
m_NumClasses = m_ClassAttribute.numValues();
else
m_NumClasses = 1;
m_Antds = new FastVector();
m_DefDstr = new double[m_NumClasses];
m_Cnsqt = new double[m_NumClasses];
m_Targets = new FastVector();
m_Random = new Random(m_Seed);
if(m_NumAntds != -1){
grow(data);
}
else{
data.randomize(m_Random);
// Split data into Grow and Prune
data.stratify(m_Folds);
Instances growData=data.trainCV(m_Folds, m_Folds-1, m_Random);
Instances pruneData=data.testCV(m_Folds, m_Folds-1);
grow(growData); // Build this rule
prune(pruneData); // Prune this rule
}
if(m_ClassAttribute.isNominal()){
Utils.normalize(m_Cnsqt);
if(Utils.gr(Utils.sum(m_DefDstr), 0))
Utils.normalize(m_DefDstr);
}
}
/**
* Computes class distribution for the given instance.
*
* @param instance the instance for which distribution is to be computed
* @return the class distribution for the given instance
*/
public double[] distributionForInstance(Instance instance) throws Exception {
if(instance == null)
throw new Exception("Testing instance is NULL!");
if (isCover(instance))
return m_Cnsqt;
else
return m_DefDstr;
}
/**
* Whether the instance covered by this rule
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is covered by this rule
*/
public boolean isCover(Instance datum){
boolean isCover=true;
for(int i=0; i<m_Antds.size(); i++){
Antd antd = (Antd)m_Antds.elementAt(i);
if(!antd.isCover(datum)){
isCover = false;
break;
}
}
return isCover;
}
/**
* Whether this rule has antecedents, i.e. whether it is a default rule
*
* @return the boolean value indicating whether the rule has antecedents
*/
public boolean hasAntds(){
if (m_Antds == null)
return false;
else
return (m_Antds.size() > 0);
}
/**
* Build one rule using the growing data
*
* @param data the growing data used to build the rule
*/
private void grow(Instances data){
Instances growData = new Instances(data);
double defInfo;
double whole = data.sumOfWeights();
if(m_NumAntds != 0){
/* Class distribution for data both covered and not covered by one antecedent */
double[][] classDstr = new double[2][m_NumClasses];
/* Compute the default information of the growing data */
for(int j=0; j < m_NumClasses; j++){
classDstr[0][j] = 0;
classDstr[1][j] = 0;
}
if(m_ClassAttribute.isNominal()){
for(int i=0; i < growData.numInstances(); i++){
Instance datum = growData.instance(i);
classDstr[0][(int)datum.classValue()] += datum.weight();
}
defInfo = ContingencyTables.entropy(classDstr[0]);
}
else{
for(int i=0; i < growData.numInstances(); i++){
Instance datum = growData.instance(i);
classDstr[0][0] += datum.weight() * datum.classValue();
}
// No need to be divided by the denomitor because
// it's always the same
double defMean = (classDstr[0][0] / whole);
defInfo = meanSquaredError(growData, defMean) * growData.sumOfWeights();
}
// Store the default class distribution
double[][] tmp = new double[2][m_NumClasses];
for(int y=0; y < m_NumClasses; y++){
if(m_ClassAttribute.isNominal()){
tmp[0][y] = classDstr[0][y];
tmp[1][y] = classDstr[1][y];
}
else{
tmp[0][y] = classDstr[0][y]/whole;
tmp[1][y] = classDstr[1][y];
}
}
m_Targets.addElement(tmp);
/* Keep the record of which attributes have already been used*/
boolean[] used=new boolean[growData.numAttributes()];
for (int k=0; k<used.length; k++)
used[k]=false;
int numUnused=used.length;
double maxInfoGain, uncoveredWtSq=0, uncoveredWtVl=0, uncoveredWts=0;
boolean isContinue = true; // The stopping criterion of this rule
while (isContinue){
maxInfoGain = 0; // We require that infoGain be positive
/* Build a list of antecedents */
Antd oneAntd=null;
Instances coverData = null, uncoverData = null;
Enumeration enumAttr=growData.enumerateAttributes();
int index=-1;
/* Build one condition based on all attributes not used yet*/
while (enumAttr.hasMoreElements()){
Attribute att= (Attribute)(enumAttr.nextElement());
index++;
Antd antd =null;
if(m_ClassAttribute.isNominal()){
if(att.isNumeric())
antd = new NumericAntd(att, classDstr[1]);
else
antd = new NominalAntd(att, classDstr[1]);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -