⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 activedecorate.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     * @return true if debugging output is on     */    public boolean getDebug() {	return m_Debug;    }        /**     * Set the base classifier for Decorate.     *     * @param newClassifier the Classifier to use.     */    public void setClassifier(Classifier newClassifier) {	m_Classifier = newClassifier;    }    /**     * Get the classifier used as the base classifier     *     * @return the classifier used as the classifier     */    public Classifier getClassifier() {	return m_Classifier;    }    /**     * Factor that determines number of artificial examples to generate.     *     * @return factor that determines number of artificial examples to generate     */    public double getArtificialSize() {	return m_ArtSize;    }      /**     * Sets factor that determines number of artificial examples to generate.     *     * @param newwArtSize factor that determines number of artificial examples to generate     */    public void setArtificialSize(double newArtSize) {	m_ArtSize = newArtSize;    }        /**     * Gets the desired size of the committee.     *     * @return the desired size of the committee     */    public int getDesiredSize() {	return m_DesiredSize;    }        /**     * Sets the desired size of the committee.     *     * @param newDesiredSize the desired size of the committee     */    public void setDesiredSize(int newDesiredSize) {	m_DesiredSize = newDesiredSize;    }        /**     * Sets the max number of Decorate iterations to run.     *     * @param numIterations  max number of Decorate iterations to run     */    public void setNumIterations(int numIterations) {	m_NumIterations = numIterations;    }    /**     * Gets the max number of Decorate iterations to run.     *     * @return the  max number of Decorate iterations to run     */    public int getNumIterations() {        return m_NumIterations;    }        /**     * Set the seed for random number generator.     *     * @param seed the random number seed      */    public void setSeed(int seed) {	m_Seed = seed;	if(m_Seed==-1){	    m_Random = new Random();	}else{	    m_Random = new Random(m_Seed);	}    }        /**     * Gets the seed for the random number generator.     *     * @return the seed for the random number generator     */    public int getSeed() {        return m_Seed;    }    /**     * Build Decorate classifier     *     * @param data the training data to be used for generating the classifier     * @exception Exception if the classifier could not be built successfully     */    public void buildClassifier(Instances data) throws Exception {	if(m_Classifier == null) {	    throw new Exception("A base classifier has not been specified!");	}	if(data.checkForStringAttributes()) {	    throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");	}	if(data.classAttribute().isNumeric()) {	    throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");	}	if(m_NumIterations < m_DesiredSize)	    throw new Exception("Max number of iterations must be >= desired ensemble size!");		trainSelectionCommittee(data);	int i = 1;//current committee size	int numTrials = 1;//number of Decorate iterations 	Instances divData = new Instances(data);//local copy of data - diversity data	Instances artData = null;//artificial data	//compute number of artficial instances to add at each iteration	int artSize = (int) (Math.abs(m_ArtSize)*data.numInstances());	if(artSize==0) artSize=1;//atleast add one random example	computeStats(data);//Compute training data stats for creating artificial examples		//initialize new committee	m_Committee = new Vector();	Classifier newClassifier = m_Classifier;	newClassifier.buildClassifier(divData);	m_Committee.add(newClassifier);	double eComm = computeError(divData);//compute ensemble error	//if(m_Debug) System.out.println("Initialize:\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);		//repeat till desired committee size is reached OR the max number of iterations is exceeded 	while(i<m_DesiredSize && numTrials<m_NumIterations){	    //Generate artificial training examples	    artData = generateArtificialData(artSize, data);	    	    //Label artificial examples	    labelData(artData);	    	    //Remove all the artificial data from the previous step (if any)	    if(divData.numInstances() > data.numInstances()) {		removeInstances(divData, artSize);	    }	    addInstances(divData, artData);//Add new artificial data	    	    //Build new classifier	    Classifier tmp[] = Classifier.makeCopies(m_Classifier,1);	    newClassifier = tmp[0]; 	    newClassifier.buildClassifier(divData);	    	    //Test if the new classifier should be added to the ensemble	    m_Committee.add(newClassifier);//add new classifier to current committee	    double currError = computeError(data);	    if(currError <= eComm){//adding the new member did not increase the error		i++;		eComm = currError;		//if(m_Debug) System.out.println("Iteration: "+(1+numTrials)+"\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);	    }else{//reject the current classifier because it increased the ensemble error 		m_Committee.removeElementAt(m_Committee.size()-1);//pop the last member	    }	    numTrials++;	}    }    //Train alternate ensemble method for use in selection    protected void trainSelectionCommittee(Instances data) throws Exception{    	if(m_SelectionScheme==BAGGING){	    if(m_SelectionCommittee==null){//initialize Bagging		System.out.println("Initializing Bagging...");		m_SelectionCommittee = new Bagging();		((Bagging)m_SelectionCommittee).setClassifier(getClassifier());		((Bagging)m_SelectionCommittee).setSeed(getSeed());		((Bagging)m_SelectionCommittee).setNumIterations(getDesiredSize());		((Bagging)m_SelectionCommittee).setBagSizePercent(100);	    }	    m_SelectionCommittee.buildClassifier(data);	}else if(m_SelectionScheme==BOOSTING){	    if(m_SelectionCommittee==null){//initialize Boosting		System.out.println("Initializing AdaBoost...");		m_SelectionCommittee = new AdaBoostM1();		((AdaBoostM1)m_SelectionCommittee).setClassifier(getClassifier());		((AdaBoostM1)m_SelectionCommittee).setSeed(getSeed());		((AdaBoostM1)m_SelectionCommittee).setMaxIterations(getDesiredSize());	    }	    m_SelectionCommittee.buildClassifier(data);	}    }        /**      * Compute and store statistics required for generating artificial data.     *     * @param data training instances     * @exception Exception if statistics could not be calculated successfully     */    protected void computeStats(Instances data) throws Exception{	int numAttributes = data.numAttributes();	m_AttributeStats = new Vector(numAttributes);//use to map attributes to their stats		for(int j=0; j<numAttributes; j++){	    if(data.attribute(j).isNominal()){		//Compute the probability of occurence of each distinct value 		int []nomCounts = (data.attributeStats(j)).nominalCounts;		double []counts = new double[nomCounts.length];		if(counts.length < 2) throw new Exception("Nominal attribute has less than two distinct values!"); 		//Perform Laplace smoothing		for(int i=0; i<counts.length; i++)		    counts[i] = nomCounts[i] + 1;		Utils.normalize(counts);		double []stats = new double[counts.length - 1];		stats[0] = counts[0];		//Calculate cumulative probabilities		for(int i=1; i<stats.length; i++)		    stats[i] = stats[i-1] + counts[i];		m_AttributeStats.add(j,stats);	    }else if(data.attribute(j).isNumeric()){		//Get mean and standard deviation from the training data		double []stats = new double[2];		stats[0] = data.meanOrMode(j);		stats[1] = Math.sqrt(data.variance(j));		m_AttributeStats.add(j,stats);	    }else System.err.println("Decorate can only handle numeric and nominal values.");	}    }    /**     * Generate artificial training examples.     * @param artSize size of examples set to create     * @param data training data     * @return the set of unlabeled artificial examples     */    protected Instances generateArtificialData(int artSize, Instances data){	int numAttributes = data.numAttributes();	Instances artData = new Instances(data, artSize);	double []att; 	Instance artInstance;		for(int i=0; i<artSize; i++){	    att = new double[numAttributes];	    for(int j=0; j<numAttributes; j++){		if(data.attribute(j).isNominal()){		    //Select nominal value based on the frequency of occurence in the training data  		    double []stats = (double [])m_AttributeStats.get(j);		    att[j] =  (double) selectIndexProbabilistically(stats);		}		else if(data.attribute(j).isNumeric()){		    //Generate numeric value from the Guassian distribution 		    //defined by the mean and std dev of the attribute		    double []stats = (double [])m_AttributeStats.get(j);		    att[j] = (m_Random.nextGaussian()*stats[1])+stats[0];		}else System.err.println("Decorate can only handle numeric and nominal values.");	    }	    artInstance = new Instance(1.0, att);	    artData.add(artInstance);	}	return artData;    }            /**      * Labels the artificially generated data.     *     * @param artData the artificially generated instances     * @exception Exception if instances cannot be labeled successfully      */    protected void labelData(Instances artData) throws Exception {	Instance curr;	double []probs;		for(int i=0; i<artData.numInstances(); i++){	    curr = artData.instance(i);	    //compute the class membership probs predicted by the current ensemble 	    probs = distributionForInstance(curr);	    //select class label inversely proportional to the ensemble predictions	    curr.setClassValue(inverseLabel(probs));	}	    }        /**      * Select class label such that the probability of selection is     * inversely proportional to the ensemble's predictions.     *     * @param probs class membership probabilities of instance     * @return index of class label selected     * @exception Exception if instances cannot be labeled successfully      */    protected int inverseLabel(double []probs) throws Exception{	double []invProbs = new double[probs.length];	//Produce probability distribution inversely proportional to the given	for(int i=0; i<probs.length; i++){	    if(probs[i]==0){		invProbs[i] = Double.MAX_VALUE/probs.length; 		//Account for probability values of 0 - to avoid divide-by-zero errors		//Divide by probs.length to make sure normalizing works properly	    }else{		invProbs[i] = 1.0 / probs[i];	    }	}	Utils.normalize(invProbs);	double []cdf = new double[invProbs.length];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -