📄 conjunctiverule.java
字号:
private boolean isIn; /* Constructor for nominal class */ public NominalAntd(Attribute a, double[] unc){ super(a, unc); int bag = att.numValues(); stats = new double[bag][m_NumClasses]; coverage = new double[bag]; isIn = true; } /* Constructor for numeric class */ public NominalAntd(Attribute a, double sq, double vl, double wts){ super(a, sq, vl, wts); int bag = att.numValues(); stats = null; coverage = new double[bag]; isIn = true; } /** * Implements the splitData function. * This procedure is to split the data into bags according * to the nominal attribute value * the data with missing values are stored in the last bag. * The infoGain for each bag is also calculated. * * @param data the data to be split * @param defInfo the default information for data * @return the array of data after split */ public Instances[] splitData(Instances data, double defInfo){ int bag = att.numValues(); Instances[] splitData = new Instances[bag+1]; double[] wSq = new double[bag]; double[] wVl = new double[bag]; double totalWS=0, totalWV=0, msingWS=0, msingWV=0, sum=data.sumOfWeights(); double[] all = new double[m_NumClasses]; double[] missing = new double[m_NumClasses]; for(int w=0; w < m_NumClasses; w++) all[w] = missing[w] = 0; for(int x=0; x<bag; x++){ coverage[x] = wSq[x] = wVl[x] = 0; if(stats != null) for(int y=0; y < m_NumClasses; y++) stats[x][y] = 0; splitData[x] = new Instances(data, data.numInstances()); } splitData[bag] = new Instances(data, data.numInstances()); // Record the statistics of data for(int x=0; x<data.numInstances(); x++){ Instance inst=data.instance(x); if(!inst.isMissing(att)){ int v = (int)inst.value(att); splitData[v].add(inst); coverage[v] += inst.weight(); if(m_ClassAttribute.isNominal()){ // Nominal class stats[v][(int)inst.classValue()] += inst.weight(); all[(int)inst.classValue()] += inst.weight(); } else{ // Numeric class wSq[v] += inst.weight() * inst.classValue() * inst.classValue(); wVl[v] += inst.weight() * inst.classValue(); totalWS += inst.weight() * inst.classValue() * inst.classValue(); totalWV += inst.weight() * inst.classValue(); } } else{ splitData[bag].add(inst); if(m_ClassAttribute.isNominal()){ // Nominal class all[(int)inst.classValue()] += inst.weight(); missing[(int)inst.classValue()] += inst.weight(); } else{ // Numeric class totalWS += inst.weight() * inst.classValue() * inst.classValue(); totalWV += inst.weight() * inst.classValue(); msingWS += inst.weight() * inst.classValue() * inst.classValue(); msingWV += inst.weight() * inst.classValue(); } } } // The total weights of the whole grow data double whole; if(m_ClassAttribute.isNominal()) whole = sum + Utils.sum(uncover); else whole = sum + uncoverSum; // Find the split double minEntrp=Double.MAX_VALUE; maxInfoGain = 0; // Check if >=2 splits have more than the minimal data int count=0; for(int x=0; x<bag; x++) if(Utils.grOrEq(coverage[x], m_MinNo)) ++count; if(count < 2){ // Don't split maxInfoGain = 0; inform = defInfo; value = Double.NaN; return null; } for(int x=0; x<bag; x++){ double t = coverage[x], entrp, infoGain; if(Utils.sm(t, m_MinNo)) continue; if(m_ClassAttribute.isNominal()){ // Nominal class double[] other = new double[m_NumClasses]; for(int y=0; y < m_NumClasses; y++) other[y] = all[y] - stats[x][y] + uncover[y]; double otherCover = whole - t; // Entropies of data covered and uncovered entrp = entropy(stats[x], t); double uncEntp = entropy(other, otherCover); // Weighted average infoGain = defInfo - (entrp*t + uncEntp*otherCover)/whole; } else{ // Numeric class double weight = (whole - t); entrp = wtMeanSqErr(wSq[x], wVl[x], t)/t; infoGain = defInfo - (entrp * t) - wtMeanSqErr((totalWS-wSq[x]+uncoverWtSq), (totalWV-wVl[x]+uncoverWtVl), weight); } // Test the exclusive expression boolean isWithin =true; if(m_IsExclude){ double infoGain2, entrp2; if(m_ClassAttribute.isNominal()){ // Nominal class double[] other2 = new double[m_NumClasses]; double[] notIn = new double[m_NumClasses]; for(int y=0; y < m_NumClasses; y++){ other2[y] = stats[x][y] + missing[y] + uncover[y]; notIn[y] = all[y] - stats[x][y] - missing[y]; } double msSum = Utils.sum(missing); double otherCover2 = t + msSum + Utils.sum(uncover); entrp2 = entropy(notIn, (sum-t-msSum)); double uncEntp2 = entropy(other2, otherCover2); infoGain2 = defInfo - (entrp2*(sum-t-msSum) + uncEntp2*otherCover2)/whole; } else{ // Numeric class double msWts = splitData[bag].sumOfWeights(); double weight2 = t + uncoverSum + msWts; entrp2 = wtMeanSqErr((totalWS-wSq[x]-msingWS), (totalWV-wVl[x]-msingWV),(sum-t-msWts)) /(sum-t-msWts); infoGain2 = defInfo - entrp2 * (sum-t-msWts) - wtMeanSqErr((wSq[x]+uncoverWtSq+msingWS), (wVl[x]+uncoverWtVl+msingWV), weight2); } // Use the exclusive expression? if (Utils.gr(infoGain2, infoGain) || (Utils.eq(infoGain2, infoGain) && Utils.sm(entrp2, entrp))){ infoGain = infoGain2; entrp = entrp2; isWithin =false; } } // Test this split if (Utils.gr(infoGain, maxInfoGain) || (Utils.eq(infoGain, maxInfoGain) && Utils.sm(entrp, minEntrp))){ value = (double)x; maxInfoGain = infoGain; inform = maxInfoGain - defInfo; minEntrp = entrp; isIn = isWithin; } } return splitData; } /** * Whether the instance is covered by this antecedent * * @param inst the instance in question * @return the boolean value indicating whether the instance is covered * by this antecedent */ public boolean isCover(Instance inst){ boolean isCover=false; if(!inst.isMissing(att)){ if(isIn){ if(Utils.eq(inst.value(att), value)) isCover=true; } else if(!Utils.eq(inst.value(att), value)) isCover=true; } return isCover; } /** * Whether the expression is "att = value" or att != value" * for this nominal attribute. True if in the former expression, * otherwise the latter * * @return the boolean value */ public boolean isIn(){ return isIn; } /** * Prints this antecedent * * @return a textual description of this antecedent */ public String toString() { String symbol = isIn ? " = " : " != "; return (att.name() + symbol + att.value((int)value)); } } /** * Returns an enumeration describing the available options * Valid options are: <p> * * -N number <br> * Set number of folds for REP. One fold is * used as the pruning set. (Default: 3) <p> * * -R <br> * Set if NOT randomize the data before split to growing and * pruning data. If NOT set, the seed of randomization is * specified by the -S option. (Default: randomize) <p> * * -S <br> * Seed of randomization. (Default: 1)<p> * * -E <br> * Set whether consider the exclusive expressions for nominal * attribute split. (Default: false) <p> * * -M number <br> * Set the minimal weights of instances within a split. * (Default: 2) <p> * * -P number <br> * Set the number of antecedents allowed in the rule if pre-pruning * is used. If this value is other than -1, then pre-pruning will be * used, otherwise the rule uses REP. (Default: -1) <p> * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector newVector = new Vector(6); newVector.addElement(new Option("\tSet number of folds for REP\n" + "\tOne fold is used as pruning set.\n" + "\t(default 3)","N", 1, "-N <number of folds>")); newVector.addElement(new Option("\tSet if NOT uses randomization\n" + "\t(default:use randomization)","R", 0, "-R")); newVector.addElement(new Option("\tSet whether consider the exclusive\n" + "\texpressions for nominal attributes\n"+ "\t(default false)","E", 0, "-E")); newVector.addElement(new Option("\tSet the minimal weights of instances\n" + "\twithin a split.\n" + "\t(default 2.0)","M", 1, "-M <min. weights>")); newVector.addElement(new Option("\tSet number of antecedents for pre-pruning\n" + "\tif -1, then REP is used\n" + "\t(default -1)","P", 1, "-P <number of antecedents>")); newVector.addElement(new Option("\tSet the seed of randomization\n" + "\t(default 1)","S", 1, "-S <seed>")); return newVector.elements(); } /** * Parses a given list of options. * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String numFoldsString = Utils.getOption('N', options); if (numFoldsString.length() != 0) m_Folds = Integer.parseInt(numFoldsString); else m_Folds = 3; String minNoString = Utils.getOption('M', options); if (minNoString.length() != 0) m_MinNo = Double.parseDouble(minNoString); else m_MinNo = 2.0; String seedString = Utils.getOption('S', options); if (seedString.length() != 0) m_Seed = Integer.parseInt(seedString); else m_Seed = 1; String numAntdsString = Utils.getOption('P', options); if (numAntdsString.length() != 0) m_NumAntds = Integer.parseInt(numAntdsString); else m_NumAntds = -1; m_IsRandomized = (!Utils.getFlag('R', options)); m_IsExclude = Utils.getFlag('E', options); } /** * Gets the current settings of the Classifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [10]; int current = 0; options[current++] = "-N"; options[current++] = "" + m_Folds; options[current++] = "-M"; options[current++] = "" + m_MinNo; options[current++] = "-P"; options[current++] = "" + m_NumAntds; options[current++] = "-S"; options[current++] = "" + m_Seed; if(!m_IsRandomized) options[current++] = "-R"; if(m_IsExclude) options[current++] = "-E"; while (current < options.length) options[current++] = ""; return options; } /** The access functions for parameters */ public void setFolds(int folds){ m_Folds = folds; } public int getFolds(){ return m_Folds; } public void setSeed(long s){ m_Seed = s; } public long getSeed(){ return m_Seed; } public boolean getRandomized(){ return m_IsRandomized;} public void setRandomized(boolean r){ m_IsRandomized = r;} public boolean getExclusive(){ return m_IsExclude;} public void setExclusive(boolean e){ m_IsExclude = e;} public void setMinNo(double m){ m_MinNo = m; } public double getMinNo(){ return m_MinNo; } public void setNumAntds(int n){ m_NumAntds = n; } public int getNumAntds(){ return m_NumAntds; } /** * Builds a single rule learner with REP dealing with nominal classes or * numeric classes. * For nominal classes, this rule learner predicts a distribution on * the classes. * For numeric classes, this learner predicts a single value. * * @param instances the training data * @exception Exception if classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { if (instances.checkForStringAttributes()) throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); Instances data = new Instances(instances); if(data.numInstances() == 0) throw new Exception("No training data!"); data.deleteWithMissingClass(); if(data.numInstances() == 0) throw new Exception("Not training data without missing class values."); if(data.numInstances() < m_Folds) throw new Exception("Not enough data for REP."); m_ClassAttribute = data.classAttribute(); if(m_ClassAttribute.isNominal()) m_NumClasses = m_ClassAttribute.numValues(); else m_NumClasses = 1; m_Antds = new FastVector(); m_DefDstr = new double[m_NumClasses]; m_Cnsqt = new double[m_NumClasses]; m_Targets = new FastVector(); m_Random = new Random(m_Seed); if(m_IsRandomized){ // Randomize the data data.randomize(m_Random); } if(m_NumAntds != -1){ grow(data); } else{ // Split data into Grow and Prune data.stratify(m_Folds); Instances growData=data.trainCV(m_Folds, m_Folds-1); Instances pruneData=data.testCV(m_Folds, m_Folds-1); grow(growData); // Build this rule prune(pruneData); // Prune this rule } if(m_ClassAttribute.isNominal()){ Utils.normalize(m_Cnsqt); if(Utils.gr(Utils.sum(m_DefDstr), 0)) Utils.normalize(m_DefDstr); } } /** * Computes class distribution for the given instance. * * @param instance the instance for which distribution is to be computed * @return the class distribution for the given instance */ public double[] distributionForInstance(Instance instance) throws Exception { if(instance == null) throw new Exception("Testing instance is NULL!"); if (isCover(instance)) return m_Cnsqt; else return m_DefDstr; } /** * Whether the instance covered by this rule * * @param inst the instance in question * @return the boolean value indicating whether the instance is covered by this rule */ public boolean isCover(Instance datum){ boolean isCover=true; for(int i=0; i<m_Antds.size(); i++){ Antd antd = (Antd)m_Antds.elementAt(i); if(!antd.isCover(datum)){ isCover = false; break; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -