⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 semisupdecorate.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
	}	if (getUseArtificial()) {	    options[current++] = "-A";	}	options[current++] = "-T"; options[current++] = "" + getThreshold();	options[current++] = "-Z"; options[current++] = "" + getUnlabeledMethod();	options[current++] = "-S"; options[current++] = "" + getSeed();	options[current++] = "-I"; options[current++] = "" + getDesiredSize();	options[current++] = "-M"; options[current++] = "" + getNumIterations();	options[current++] = "-R"; options[current++] = "" + getArtificialSize();	options[current++] = "-L"; options[current++] = "" + getLambda();	if (getClassifier() != null) {	    options[current++] = "-W";	    options[current++] = getClassifier().getClass().getName();	}	options[current++] = "--";	System.arraycopy(classifierOptions, 0, options, current, 			 classifierOptions.length);	current += classifierOptions.length;	while (current < options.length) {	    options[current++] = "";	}	return options;    }      /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String desiredSizeTipText() {      return "the desired number of member classifiers in the SemiSupDecorate ensemble. SemiSupDecorate may terminate "	+"before this size is reached (depending on the value of numIterations). "	+"Larger ensemble sizes usually lead to more accurate models, but increases "	+"training time and model complexity.";  }      /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String numIterationsTipText() {    return "the maximum number of SemiSupDecorate iterations to run. Each iteration generates a classifier, "	+"but does not necessarily add it to the ensemble. SemiSupDecorate stops when the desired ensemble "	+"size is reached. This parameter should be greater than "	+"equal to the desiredSize. If the desiredSize is not being reached it may help to "	+"increase this value.";  }      /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String artificialSizeTipText() {    return "determines the number of artificial examples to use during training. Specified as "	+"a proportion of the training data. Higher values can increase ensemble diversity.";  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String seedTipText() {    return "seed for random number generator used for creating artificial data."	+" Set to -1 to use a random seed.";  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter guib   */  public String classifierTipText() {    return "the desired base learner for the ensemble.";  }  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {      return "DECORATE is a meta-learner for building diverse ensembles of "	  +"classifiers by using specially constructed artificial training "	  +"examples. Comprehensive experiments have demonstrated that this "	  +"technique is consistently more accurate than the base classifier, Bagging and Random Forests."	  +"SemiSupDecorate also obtains higher accuracy than Boosting on small training sets, and achieves "	  +"comparable performance on larger training sets. "	  +"For more details see: P. Melville & R. J. Mooney. Constructing diverse classifier ensembles "	  +"using artificial training examples (IJCAI 2003).\n"	  +"P. Melville & R. J. Mooney. Creating diversity in ensembles using artificial data (submitted).";  }    /**     * Set debugging mode     *     * @param debug true if debug output should be printed     */    public void setDebug(boolean debug) {	m_Debug = debug;    }        /**     * Get whether debugging is turned on     *     * @return true if debugging output is on     */    public boolean getDebug() {	return m_Debug;    }        /**     * Set the base classifier for SemiSupDecorate.     *     * @param newClassifier the Classifier to use.     */    public void setClassifier(Classifier newClassifier) {	m_Classifier = newClassifier;    }    /**     * Get the classifier used as the base classifier     *     * @return the classifier used as the classifier     */    public Classifier getClassifier() {	return m_Classifier;    }    /**     * Factor that determines number of artificial examples to generate.     *     * @return factor that determines number of artificial examples to generate     */    public double getArtificialSize() {	return m_ArtSize;    }      /**     * Sets factor that determines number of artificial examples to generate.     *     * @param newwArtSize factor that determines number of artificial examples to generate     */    public void setArtificialSize(double newArtSize) {	m_ArtSize = newArtSize;    }        /**     * Gets the desired size of the committee.     *     * @return the desired size of the committee     */    public int getDesiredSize() {	return m_DesiredSize;    }        /**     * Sets the desired size of the committee.     *     * @param newDesiredSize the desired size of the committee     */    public void setDesiredSize(int newDesiredSize) {	m_DesiredSize = newDesiredSize;    }        /**     * Sets the max number of SemiSupDecorate iterations to run.     *     * @param numIterations  max number of SemiSupDecorate iterations to run     */    public void setNumIterations(int numIterations) {	m_NumIterations = numIterations;    }    /**     * Gets the max number of SemiSupDecorate iterations to run.     *     * @return the  max number of SemiSupDecorate iterations to run     */    public int getNumIterations() {        return m_NumIterations;    }        /**     * Set the seed for random number generator.     *     * @param seed the random number seed      */    public void setSeed(int seed) {	m_Seed = seed;    }        /**     * Gets the seed for the random number generator.     *     * @return the seed for the random number generator     */    public int getSeed() {        return m_Seed;    }    /**      * Provide unlabeled data to the classifier.     * @unlabeled the unlabeled Instances     */    public void setUnlabeled(Instances unlabeled){	m_Unlabeled = new Instances(unlabeled);//make local copy of unlabeled data	//Reweight unlabeled instances if necessary	if (m_Lambda != 1.0) 	    weightInstances(m_Unlabeled, m_Lambda);    }    /** Weighted all given instances with given weight */    protected void weightInstances (Instances insts, double weight) {	Enumeration enumInsts = insts.enumerateInstances();	while (enumInsts.hasMoreElements()) {	    Instance instance = (Instance) enumInsts.nextElement();	    instance.setWeight(weight);	}    }        /**     * Build SemiSupDecorate classifier     *     * @param data the training data to be used for generating the classifier     * @exception Exception if the classifier could not be built successfully     */    public void buildClassifier(Instances data) throws Exception {	if(m_Classifier == null) {	    throw new Exception("A base classifier has not been specified!");	}	if(data.checkForStringAttributes()) {	    throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");	}	if(data.classAttribute().isNumeric()) {	    throw new UnsupportedClassTypeException("SemiSupDecorate can't handle a numeric class!");	}	if(m_NumIterations < m_DesiredSize)	    throw new Exception("Max number of iterations must be >= desired ensemble size!");		//initialize random number generator	if(m_Seed==-1) m_Random = new Random();	else m_Random = new Random(m_Seed);		//initialize ensemble wts to be equal 	m_EnsembleWts = new double [m_DesiredSize];	for(int j=0; j<m_DesiredSize; j++)	    m_EnsembleWts[j] = 1.0;	initMeasures();		int numUnlabeledUsed = 0;	int i = 1;//current committee size	int numTrials = 1;//number of SemiSupDecorate iterations 	Instances divData = new Instances(data);//local copy of data - diversity data	divData.deleteWithMissingClass();	Instances artData = null;//artificial data	//compute number of artficial instances to add at each iteration	int artSize = (int) (Math.abs(m_ArtSize)*divData.numInstances());	if(artSize==0) artSize=1;//atleast add one random example	computeStats(data);//Compute training data stats for creating artificial examples		//initialize new committee	m_Committee = new Vector();	Classifier newClassifier = m_Classifier;	newClassifier.buildClassifier(divData);	m_Committee.add(newClassifier);	double eComm = computeError(divData);//compute ensemble error	if(m_Debug) System.out.println("Initialize:\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);		//repeat till desired committee size is reached OR the max number of iterations is exceeded 	while(i<m_DesiredSize && numTrials<m_NumIterations){	    	    if(m_UseArtificial){		//Generate artificial training examples		artData = generateArtificialData(artSize, data);		//Label artificial examples		labelData(artData);		//Add new artificial data		addInstances(divData, artData);	    }	    	    //Add unlabeled data	    if(m_UseUnlabeled) numUnlabeledUsed = addUnlabeled(divData);	    	    //Build new classifier	    Classifier tmp[] = Classifier.makeCopies(m_Classifier,1);	    newClassifier = tmp[0]; 	    newClassifier.buildClassifier(divData);	    	    //Remove unlabeled data	    if(m_UseUnlabeled) removeInstances(divData, numUnlabeledUsed);	    	    //Remove all the artificial data	    if(m_UseArtificial) removeInstances(divData, artSize);	    	    assert (divData.numInstances()==data.numInstances()) : "Diversity data error!";	    //Test if the new classifier should be added to the ensemble	    m_Committee.add(newClassifier);//add new classifier to current committee	    double currError = computeError(divData);	    if(currError <= eComm){//adding the new member did not increase the error		i++;		eComm = currError;		if(m_Debug) System.out.println("Iteration: "+(1+numTrials)+"\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);	    }else{//reject the current classifier because it increased the ensemble error 		m_Committee.removeElementAt(m_Committee.size()-1);//pop the last member	    }	    numTrials++;	}    }    /**      * Add unlabeled data to training set.      * @param divData diversity data to add to.     * @return number of unlabeled examples used.      */    protected int addUnlabeled(Instances divData) throws Exception{	//set of unlabeled examples eventually used (some may be ignored). 	Instances unlabeledUsed = new Instances(divData);	switch (m_UnlabeledMethod){	case ALL:	    unlabeledUsed = labelAll(m_Unlabeled);	    break;	case IGNORE_LOW:	    unlabeledUsed = labelIgnoreLow(m_Unlabeled, unlabeledUsed);	    break;	case IGNORE_HIGH:	    unlabeledUsed = labelIgnoreHigh(m_Unlabeled, unlabeledUsed);	    break;	case FLIP_LOW:	    unlabeledUsed = labelFlipLow(m_Unlabeled);	    break;	default:	    System.err.println("Unrecognized unlabeled method selected!");	}	System.out.println("Unlabeled size = "+m_Unlabeled.numInstances()+" Used size = "+unlabeledUsed.numInstances());	addInstances(divData, unlabeledUsed);	return unlabeledUsed.numInstances();    }        /** Label examples as predicted by the current ensemble. */    protected Instances labelAll(Instances instances) throws Exception {	Instance curr;	for(int i=0; i<instances.numInstances(); i++){	    curr = instances.instance(i);	    curr.setClassValue(classifyInstance(curr));	}	return m_Unlabeled;    }    /**      * Label high-confidence examples with the ensemble's prediction.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -