📄 conjunctiverule.java
字号:
if(m_ClassAttribute.isNominal()){
fstEntp = entropy(fst, fstCover);
sndEntp = entropy(snd, sndCover);
}
else{
fstEntp = wtMeanSqErr(fstWtSq, fstWtVl, fstCover)/fstCover;
sndEntp = wtMeanSqErr(sndWtSq, sndWtVl, sndCover)/sndCover;
}
/* Which bag has higher information gain? */
boolean isFirst;
double fstInfoGain, sndInfoGain;
double info, infoGain, fstInfo, sndInfo;
if(m_ClassAttribute.isNominal()){
double sum = data.sumOfWeights();
double otherCover, whole = sum + Utils.sum(uncover), otherEntropy;
double[] other = null;
// InfoGain of first bag
other = new double[m_NumClasses];
for(int z=0; z < m_NumClasses; z++)
other[z] = uncover[z] + snd[z] + missing[z];
otherCover = whole - fstCover;
otherEntropy = entropy(other, otherCover);
// Weighted average
fstInfo = (fstEntp*fstCover + otherEntropy*otherCover)/whole;
fstInfoGain = defInfo - fstInfo;
// InfoGain of second bag
other = new double[m_NumClasses];
for(int z=0; z < m_NumClasses; z++)
other[z] = uncover[z] + fst[z] + missing[z];
otherCover = whole - sndCover;
otherEntropy = entropy(other, otherCover);
// Weighted average
sndInfo = (sndEntp*sndCover + otherEntropy*otherCover)/whole;
sndInfoGain = defInfo - sndInfo;
}
else{
double sum = data.sumOfWeights();
double otherWtSq = (sndWtSq + msingWtSq + uncoverWtSq),
otherWtVl = (sndWtVl + msingWtVl + uncoverWtVl),
otherCover = (sum - fstCover + uncoverSum);
fstInfo = Utils.eq(fstCover, 0) ? 0 : (fstEntp * fstCover);
fstInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);
fstInfoGain = defInfo - fstInfo;
otherWtSq = (fstWtSq + msingWtSq + uncoverWtSq);
otherWtVl = (fstWtVl + msingWtVl + uncoverWtVl);
otherCover = sum - sndCover + uncoverSum;
sndInfo = Utils.eq(sndCover, 0) ? 0 : (sndEntp * sndCover);
sndInfo += wtMeanSqErr(otherWtSq, otherWtVl, otherCover);
sndInfoGain = defInfo - sndInfo;
}
if(Utils.gr(fstInfoGain,sndInfoGain) ||
(Utils.eq(fstInfoGain,sndInfoGain)&&(Utils.sm(fstEntp,sndEntp)))){
isFirst = true;
infoGain = fstInfoGain;
info = fstInfo;
}
else{
isFirst = false;
infoGain = sndInfoGain;
info = sndInfo;
}
boolean isUpdate = Utils.gr(infoGain, maxInfoGain);
/* Check whether so far the max infoGain */
if(isUpdate){
splitPoint = ((data.instance(split).value(att)) + (data.instance(prev).value(att)))/2.0;
value = ((isFirst) ? 0 : 1);
inform = info;
maxInfoGain = infoGain;
finalSplit = split;
}
prev=split;
}
}
/* Split the data */
Instances[] splitData = new Instances[3];
splitData[0] = new Instances(data, 0, finalSplit);
splitData[1] = new Instances(data, finalSplit, total-finalSplit);
splitData[2] = new Instances(missingData);
return splitData;
}
/**
* Whether the instance is covered by this antecedent
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is covered
* by this antecedent
*/
public boolean isCover(Instance inst){
boolean isCover=false;
if(!inst.isMissing(att)){
if(Utils.eq(value, 0)){
if(Utils.smOrEq(inst.value(att), splitPoint))
isCover=true;
}
else if(Utils.gr(inst.value(att), splitPoint))
isCover=true;
}
return isCover;
}
/**
* Prints this antecedent
*
* @return a textual description of this antecedent
*/
public String toString() {
String symbol = Utils.eq(value, 0.0) ? " <= " : " > ";
return (att.name() + symbol + Utils.doubleToString(splitPoint, 6));
}
}
/**
* The antecedent with nominal attribute
*/
class NominalAntd extends Antd{
/* The parameters of infoGain calculated for each attribute value */
private double[][] stats;
private double[] coverage;
private boolean isIn;
/* Constructor for nominal class */
public NominalAntd(Attribute a, double[] unc){
super(a, unc);
int bag = att.numValues();
stats = new double[bag][m_NumClasses];
coverage = new double[bag];
isIn = true;
}
/* Constructor for numeric class */
public NominalAntd(Attribute a, double sq, double vl, double wts){
super(a, sq, vl, wts);
int bag = att.numValues();
stats = null;
coverage = new double[bag];
isIn = true;
}
/**
* Implements the splitData function.
* This procedure is to split the data into bags according
* to the nominal attribute value
* the data with missing values are stored in the last bag.
* The infoGain for each bag is also calculated.
*
* @param data the data to be split
* @param defInfo the default information for data
* @return the array of data after split
*/
public Instances[] splitData(Instances data, double defInfo){
int bag = att.numValues();
Instances[] splitData = new Instances[bag+1];
double[] wSq = new double[bag];
double[] wVl = new double[bag];
double totalWS=0, totalWV=0, msingWS=0, msingWV=0, sum=data.sumOfWeights();
double[] all = new double[m_NumClasses];
double[] missing = new double[m_NumClasses];
for(int w=0; w < m_NumClasses; w++)
all[w] = missing[w] = 0;
for(int x=0; x<bag; x++){
coverage[x] = wSq[x] = wVl[x] = 0;
if(stats != null)
for(int y=0; y < m_NumClasses; y++)
stats[x][y] = 0;
splitData[x] = new Instances(data, data.numInstances());
}
splitData[bag] = new Instances(data, data.numInstances());
// Record the statistics of data
for(int x=0; x<data.numInstances(); x++){
Instance inst=data.instance(x);
if(!inst.isMissing(att)){
int v = (int)inst.value(att);
splitData[v].add(inst);
coverage[v] += inst.weight();
if(m_ClassAttribute.isNominal()){ // Nominal class
stats[v][(int)inst.classValue()] += inst.weight();
all[(int)inst.classValue()] += inst.weight();
}
else{ // Numeric class
wSq[v] += inst.weight() * inst.classValue() * inst.classValue();
wVl[v] += inst.weight() * inst.classValue();
totalWS += inst.weight() * inst.classValue() * inst.classValue();
totalWV += inst.weight() * inst.classValue();
}
}
else{
splitData[bag].add(inst);
if(m_ClassAttribute.isNominal()){ // Nominal class
all[(int)inst.classValue()] += inst.weight();
missing[(int)inst.classValue()] += inst.weight();
}
else{ // Numeric class
totalWS += inst.weight() * inst.classValue() * inst.classValue();
totalWV += inst.weight() * inst.classValue();
msingWS += inst.weight() * inst.classValue() * inst.classValue();
msingWV += inst.weight() * inst.classValue();
}
}
}
// The total weights of the whole grow data
double whole;
if(m_ClassAttribute.isNominal())
whole = sum + Utils.sum(uncover);
else
whole = sum + uncoverSum;
// Find the split
double minEntrp=Double.MAX_VALUE;
maxInfoGain = 0;
// Check if >=2 splits have more than the minimal data
int count=0;
for(int x=0; x<bag; x++)
if(Utils.grOrEq(coverage[x], m_MinNo))
++count;
if(count < 2){ // Don't split
maxInfoGain = 0;
inform = defInfo;
value = Double.NaN;
return null;
}
for(int x=0; x<bag; x++){
double t = coverage[x], entrp, infoGain;
if(Utils.sm(t, m_MinNo))
continue;
if(m_ClassAttribute.isNominal()){ // Nominal class
double[] other = new double[m_NumClasses];
for(int y=0; y < m_NumClasses; y++)
other[y] = all[y] - stats[x][y] + uncover[y];
double otherCover = whole - t;
// Entropies of data covered and uncovered
entrp = entropy(stats[x], t);
double uncEntp = entropy(other, otherCover);
// Weighted average
infoGain = defInfo - (entrp*t + uncEntp*otherCover)/whole;
}
else{ // Numeric class
double weight = (whole - t);
entrp = wtMeanSqErr(wSq[x], wVl[x], t)/t;
infoGain = defInfo - (entrp * t) -
wtMeanSqErr((totalWS-wSq[x]+uncoverWtSq),
(totalWV-wVl[x]+uncoverWtVl),
weight);
}
// Test the exclusive expression
boolean isWithin =true;
if(m_IsExclude){
double infoGain2, entrp2;
if(m_ClassAttribute.isNominal()){ // Nominal class
double[] other2 = new double[m_NumClasses];
double[] notIn = new double[m_NumClasses];
for(int y=0; y < m_NumClasses; y++){
other2[y] = stats[x][y] + missing[y] + uncover[y];
notIn[y] = all[y] - stats[x][y] - missing[y];
}
double msSum = Utils.sum(missing);
double otherCover2 = t + msSum + Utils.sum(uncover);
entrp2 = entropy(notIn, (sum-t-msSum));
double uncEntp2 = entropy(other2, otherCover2);
infoGain2 = defInfo -
(entrp2*(sum-t-msSum) + uncEntp2*otherCover2)/whole;
}
else{ // Numeric class
double msWts = splitData[bag].sumOfWeights();
double weight2 = t + uncoverSum + msWts;
entrp2 = wtMeanSqErr((totalWS-wSq[x]-msingWS),
(totalWV-wVl[x]-msingWV),(sum-t-msWts))
/(sum-t-msWts);
infoGain2 = defInfo - entrp2 * (sum-t-msWts) -
wtMeanSqErr((wSq[x]+uncoverWtSq+msingWS),
(wVl[x]+uncoverWtVl+msingWV),
weight2);
}
// Use the exclusive expression?
if (Utils.gr(infoGain2, infoGain) ||
(Utils.eq(infoGain2, infoGain) && Utils.sm(entrp2, entrp))){
infoGain = infoGain2;
entrp = entrp2;
isWithin =false;
}
}
// Test this split
if (Utils.gr(infoGain, maxInfoGain) ||
(Utils.eq(infoGain, maxInfoGain) && Utils.sm(entrp, minEntrp))){
value = (double)x;
maxInfoGain = infoGain;
inform = maxInfoGain - defInfo;
minEntrp = entrp;
isIn = isWithin;
}
}
return splitData;
}
/**
* Whether the instance is covered by this antecedent
*
* @param inst the instance in question
* @return the boolean value indicating whether the instance is covered
* by this antecedent
*/
public boolean isCover(Instance inst){
boolean isCover=false;
if(!inst.isMissing(att)){
if(isIn){
if(Utils.eq(inst.value(att), value))
isCover=true;
}
else if(!Utils.eq(inst.value(att), value))
isCover=true;
}
return isCover;
}
/**
* Whether the expression is "att = value" or att != value"
* for this nominal attribute. True if in the former expression,
* otherwise the latter
*
* @return the boolean value
*/
public boolean isIn(){
return isIn;
}
/**
* Prints this antecedent
*
* @return a textual description of this antecedent
*/
public String toString() {
String symbol = isIn ? " = " : " != ";
return (att.name() + symbol + att.value((int)value));
}
}
/**
* Returns an enumeration describing the available options
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -