📄 semisupdecorate.java
字号:
} if (getUseArtificial()) { options[current++] = "-A"; } options[current++] = "-T"; options[current++] = "" + getThreshold(); options[current++] = "-Z"; options[current++] = "" + getUnlabeledMethod(); options[current++] = "-S"; options[current++] = "" + getSeed(); options[current++] = "-I"; options[current++] = "" + getDesiredSize(); options[current++] = "-M"; options[current++] = "" + getNumIterations(); options[current++] = "-R"; options[current++] = "" + getArtificialSize(); options[current++] = "-L"; options[current++] = "" + getLambda(); if (getClassifier() != null) { options[current++] = "-W"; options[current++] = getClassifier().getClass().getName(); } options[current++] = "--"; System.arraycopy(classifierOptions, 0, options, current, classifierOptions.length); current += classifierOptions.length; while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String desiredSizeTipText() { return "the desired number of member classifiers in the SemiSupDecorate ensemble. SemiSupDecorate may terminate " +"before this size is reached (depending on the value of numIterations). " +"Larger ensemble sizes usually lead to more accurate models, but increases " +"training time and model complexity."; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numIterationsTipText() { return "the maximum number of SemiSupDecorate iterations to run. Each iteration generates a classifier, " +"but does not necessarily add it to the ensemble. SemiSupDecorate stops when the desired ensemble " +"size is reached. This parameter should be greater than " +"equal to the desiredSize. If the desiredSize is not being reached it may help to " +"increase this value."; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String artificialSizeTipText() { return "determines the number of artificial examples to use during training. Specified as " +"a proportion of the training data. Higher values can increase ensemble diversity."; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String seedTipText() { return "seed for random number generator used for creating artificial data." +" Set to -1 to use a random seed."; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter guib */ public String classifierTipText() { return "the desired base learner for the ensemble."; } /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "DECORATE is a meta-learner for building diverse ensembles of " +"classifiers by using specially constructed artificial training " +"examples. Comprehensive experiments have demonstrated that this " +"technique is consistently more accurate than the base classifier, Bagging and Random Forests." +"SemiSupDecorate also obtains higher accuracy than Boosting on small training sets, and achieves " +"comparable performance on larger training sets. " +"For more details see: P. Melville & R. J. Mooney. Constructing diverse classifier ensembles " +"using artificial training examples (IJCAI 2003).\n" +"P. Melville & R. J. Mooney. Creating diversity in ensembles using artificial data (submitted)."; } /** * Set debugging mode * * @param debug true if debug output should be printed */ public void setDebug(boolean debug) { m_Debug = debug; } /** * Get whether debugging is turned on * * @return true if debugging output is on */ public boolean getDebug() { return m_Debug; } /** * Set the base classifier for SemiSupDecorate. * * @param newClassifier the Classifier to use. */ public void setClassifier(Classifier newClassifier) { m_Classifier = newClassifier; } /** * Get the classifier used as the base classifier * * @return the classifier used as the classifier */ public Classifier getClassifier() { return m_Classifier; } /** * Factor that determines number of artificial examples to generate. * * @return factor that determines number of artificial examples to generate */ public double getArtificialSize() { return m_ArtSize; } /** * Sets factor that determines number of artificial examples to generate. * * @param newwArtSize factor that determines number of artificial examples to generate */ public void setArtificialSize(double newArtSize) { m_ArtSize = newArtSize; } /** * Gets the desired size of the committee. * * @return the desired size of the committee */ public int getDesiredSize() { return m_DesiredSize; } /** * Sets the desired size of the committee. * * @param newDesiredSize the desired size of the committee */ public void setDesiredSize(int newDesiredSize) { m_DesiredSize = newDesiredSize; } /** * Sets the max number of SemiSupDecorate iterations to run. * * @param numIterations max number of SemiSupDecorate iterations to run */ public void setNumIterations(int numIterations) { m_NumIterations = numIterations; } /** * Gets the max number of SemiSupDecorate iterations to run. * * @return the max number of SemiSupDecorate iterations to run */ public int getNumIterations() { return m_NumIterations; } /** * Set the seed for random number generator. * * @param seed the random number seed */ public void setSeed(int seed) { m_Seed = seed; } /** * Gets the seed for the random number generator. * * @return the seed for the random number generator */ public int getSeed() { return m_Seed; } /** * Provide unlabeled data to the classifier. * @unlabeled the unlabeled Instances */ public void setUnlabeled(Instances unlabeled){ m_Unlabeled = new Instances(unlabeled);//make local copy of unlabeled data //Reweight unlabeled instances if necessary if (m_Lambda != 1.0) weightInstances(m_Unlabeled, m_Lambda); } /** Weighted all given instances with given weight */ protected void weightInstances (Instances insts, double weight) { Enumeration enumInsts = insts.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = (Instance) enumInsts.nextElement(); instance.setWeight(weight); } } /** * Build SemiSupDecorate classifier * * @param data the training data to be used for generating the classifier * @exception Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if(m_Classifier == null) { throw new Exception("A base classifier has not been specified!"); } if(data.checkForStringAttributes()) { throw new UnsupportedAttributeTypeException("Cannot handle string attributes!"); } if(data.classAttribute().isNumeric()) { throw new UnsupportedClassTypeException("SemiSupDecorate can't handle a numeric class!"); } if(m_NumIterations < m_DesiredSize) throw new Exception("Max number of iterations must be >= desired ensemble size!"); //initialize random number generator if(m_Seed==-1) m_Random = new Random(); else m_Random = new Random(m_Seed); //initialize ensemble wts to be equal m_EnsembleWts = new double [m_DesiredSize]; for(int j=0; j<m_DesiredSize; j++) m_EnsembleWts[j] = 1.0; initMeasures(); int numUnlabeledUsed = 0; int i = 1;//current committee size int numTrials = 1;//number of SemiSupDecorate iterations Instances divData = new Instances(data);//local copy of data - diversity data divData.deleteWithMissingClass(); Instances artData = null;//artificial data //compute number of artficial instances to add at each iteration int artSize = (int) (Math.abs(m_ArtSize)*divData.numInstances()); if(artSize==0) artSize=1;//atleast add one random example computeStats(data);//Compute training data stats for creating artificial examples //initialize new committee m_Committee = new Vector(); Classifier newClassifier = m_Classifier; newClassifier.buildClassifier(divData); m_Committee.add(newClassifier); double eComm = computeError(divData);//compute ensemble error if(m_Debug) System.out.println("Initialize:\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm); //repeat till desired committee size is reached OR the max number of iterations is exceeded while(i<m_DesiredSize && numTrials<m_NumIterations){ if(m_UseArtificial){ //Generate artificial training examples artData = generateArtificialData(artSize, data); //Label artificial examples labelData(artData); //Add new artificial data addInstances(divData, artData); } //Add unlabeled data if(m_UseUnlabeled) numUnlabeledUsed = addUnlabeled(divData); //Build new classifier Classifier tmp[] = Classifier.makeCopies(m_Classifier,1); newClassifier = tmp[0]; newClassifier.buildClassifier(divData); //Remove unlabeled data if(m_UseUnlabeled) removeInstances(divData, numUnlabeledUsed); //Remove all the artificial data if(m_UseArtificial) removeInstances(divData, artSize); assert (divData.numInstances()==data.numInstances()) : "Diversity data error!"; //Test if the new classifier should be added to the ensemble m_Committee.add(newClassifier);//add new classifier to current committee double currError = computeError(divData); if(currError <= eComm){//adding the new member did not increase the error i++; eComm = currError; if(m_Debug) System.out.println("Iteration: "+(1+numTrials)+"\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm); }else{//reject the current classifier because it increased the ensemble error m_Committee.removeElementAt(m_Committee.size()-1);//pop the last member } numTrials++; } } /** * Add unlabeled data to training set. * @param divData diversity data to add to. * @return number of unlabeled examples used. */ protected int addUnlabeled(Instances divData) throws Exception{ //set of unlabeled examples eventually used (some may be ignored). Instances unlabeledUsed = new Instances(divData); switch (m_UnlabeledMethod){ case ALL: unlabeledUsed = labelAll(m_Unlabeled); break; case IGNORE_LOW: unlabeledUsed = labelIgnoreLow(m_Unlabeled, unlabeledUsed); break; case IGNORE_HIGH: unlabeledUsed = labelIgnoreHigh(m_Unlabeled, unlabeledUsed); break; case FLIP_LOW: unlabeledUsed = labelFlipLow(m_Unlabeled); break; default: System.err.println("Unrecognized unlabeled method selected!"); } System.out.println("Unlabeled size = "+m_Unlabeled.numInstances()+" Used size = "+unlabeledUsed.numInstances()); addInstances(divData, unlabeledUsed); return unlabeledUsed.numInstances(); } /** Label examples as predicted by the current ensemble. */ protected Instances labelAll(Instances instances) throws Exception { Instance curr; for(int i=0; i<instances.numInstances(); i++){ curr = instances.instance(i); curr.setClassValue(classifyInstance(curr)); } return m_Unlabeled; } /** * Label high-confidence examples with the ensemble's prediction.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -