⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crate.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    public double getArtificialSize() {	return m_ArtSize;    }      /**     * Sets factor that determines number of artificial examples to generate.     *     * @param newwArtSize factor that determines number of artificial examples to generate     */    public void setArtificialSize(double newArtSize) {	m_ArtSize = newArtSize;    }        /**     * Gets the desired size of the committee.     *     * @return the desired size of the committee     */    public int getDesiredSize() {	return m_DesiredSize;    }        /**     * Sets the desired size of the committee.     *     * @param newDesiredSize the desired size of the committee     */    public void setDesiredSize(int newDesiredSize) {	m_DesiredSize = newDesiredSize;    }        /**     * Sets the max number of Crate iterations to run.     *     * @param numIterations  max number of Crate iterations to run     */    public void setNumIterations(int numIterations) {	m_NumIterations = numIterations;    }    /**     * Gets the max number of Crate iterations to run.     *     * @return the  max number of Crate iterations to run     */    public int getNumIterations() {        return m_NumIterations;    }        /**     * Set the seed for random number generator.     *     * @param seed the random number seed      */    public void setSeed(int seed) {	m_Seed = seed;	if(m_Seed==-1){	    m_Random = new Random();	}else{	    m_Random = new Random(m_Seed);	}    }        /**     * Gets the seed for the random number generator.     *     * @return the seed for the random number generator     */    public int getSeed() {        return m_Seed;    }    /**     * Build Crate classifier     *     * @param data the training data to be used for generating the classifier     * @exception Exception if the classifier could not be built successfully     */    public void buildClassifier(Instances data) throws Exception {	if(m_Classifier == null) {	    throw new Exception("A base classifier has not been specified!");	}	if(data.checkForStringAttributes()) {	    throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");	}	if(!(data.classAttribute().isNumeric())) {	    throw new UnsupportedClassTypeException("Crate must be applied to numeric classes!");	}	if(m_NumIterations < m_DesiredSize)	    throw new Exception("Max number of iterations must be >= desired ensemble size!");		int i = 1;//current committee size	int numTrials = 1;//number of Crate iterations 	Instances divData = new Instances(data);//local copy of data - diversity data	divData.deleteWithMissingClass();	//m_Evaluation = new Evaluation(divData);	Instances artData = null;//artificial data	//compute number of artficial instances to add at each iteration	int artSize = (int) (Math.abs(m_ArtSize)*divData.numInstances());	if(artSize==0) artSize=1;//atleast add one random example	computeStats(data);//Compute training data stats for creating artificial examples		//initialize new committee	m_Committee = new Vector();	Classifier copiesOfClassifier[] = Classifier.makeCopies(m_Classifier,m_DesiredSize);	//All these copies may not be used	//Classifier newClassifier = m_Classifier;	Classifier newClassifier = copiesOfClassifier[0];	newClassifier.buildClassifier(divData);	m_Committee.add(newClassifier);	double eComm = computeError(divData);//compute ensemble error	if(m_Debug) System.out.println("Initialize:\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);		//repeat till desired committee size is reached OR the max number of iterations is exceeded 	while(i<m_DesiredSize && numTrials<m_NumIterations){	    //Generate artificial training examples	    artData = generateArtificialData(artSize, divData);	    //Label artificial examples	    labelData(artData);	    addInstances(divData, artData);//Add new artificial data	    	    //Build new classifier	    newClassifier = copiesOfClassifier[i]; 	    newClassifier.buildClassifier(divData);	    //Remove all the artificial data	    removeInstances(divData, artSize);	    	    //Test if the new classifier should be added to the ensemble	    m_Committee.add(newClassifier);//add new classifier to current committee	    double currError = computeError(divData);	    if(currError <= eComm){//adding the new member did not increase the error		i++;		eComm = currError;		if(m_Debug) System.out.println("Iteration: "+(1+numTrials)+"\tClassifier "+i+" added to ensemble. Ensemble error = "+eComm);	    }else{//reject the current classifier because it increased the ensemble error 		m_Committee.removeElementAt(m_Committee.size()-1);//pop the last member	    }	    numTrials++;	}    }        /**      * Compute and store statistics required for generating artificial data.     *     * @param data training instances     * @exception Exception if statistics could not be calculated successfully     */    protected void computeStats(Instances data) throws Exception{	int numAttributes = data.numAttributes();	m_AttributeStats = new Vector(numAttributes);//use to map attributes to their stats		for(int j=0; j<numAttributes; j++){	    if(data.attribute(j).isNominal()){		//Compute the probability of occurence of each distinct value 		int []nomCounts = (data.attributeStats(j)).nominalCounts;		double []counts = new double[nomCounts.length];		if(counts.length < 2) throw new Exception("Nominal attribute has less than two distinct values!"); 		//Perform Laplace smoothing		for(int i=0; i<counts.length; i++)		    counts[i] = nomCounts[i] + 1;		Utils.normalize(counts);		double []stats = new double[counts.length - 1];		stats[0] = counts[0];		//Calculate cumulative probabilities		for(int i=1; i<stats.length; i++)		    stats[i] = stats[i-1] + counts[i];		m_AttributeStats.add(j,stats);	    }else if(data.attribute(j).isNumeric()){		//Get mean and standard deviation from the training data		double []stats = new double[2];		stats[0] = data.meanOrMode(j);		stats[1] = Math.sqrt(data.variance(j));		m_AttributeStats.add(j,stats);	    }else System.err.println("Crate can only handle numeric and nominal values.");	}    }    /**     * Generate artificial training examples.     * @param artSize size of examples set to create     * @param data training data     * @return the set of unlabeled artificial examples     */    protected Instances generateArtificialData(int artSize, Instances data){	int numAttributes = data.numAttributes();	Instances artData = new Instances(data, artSize);	double []att; 	Instance artInstance;		for(int i=0; i<artSize; i++){	    att = new double[numAttributes];	    for(int j=0; j<numAttributes; j++){		if(data.attribute(j).isNominal()){		    //Select nominal value based on the frequency of occurence in the training data  		    double []stats = (double [])m_AttributeStats.get(j);		    att[j] =  (double) selectIndexProbabilistically(stats);		}		else if(data.attribute(j).isNumeric()){		    //Generate numeric value from the Guassian distribution 		    //defined by the mean and std dev of the attribute		    double []stats = (double [])m_AttributeStats.get(j);		    att[j] = (m_Random.nextGaussian()*stats[1])+stats[0];		}else System.err.println("Crate can only handle numeric and nominal values.");	    }	    artInstance = new Instance(1.0, att);	    artData.add(artInstance);	}	return artData;    }    /**      * Given cumulative probabilities select a nominal attribute value index      *     * @param cdf array of cumulative probabilities     * @return index of attribute selected based on the probability distribution      */    protected int selectIndexProbabilistically(double []cdf){	double rnd = m_Random.nextDouble();	int index = 0;	while(index < cdf.length && rnd > cdf[index]){	    index++;	}	return index;    }            /**      * Labels the artificially generated data.     *     * @param artData the artificially generated instances     * @exception Exception if instances cannot be labeled successfully      */    protected void labelData(Instances artData) throws Exception {	Instance curr;	double []preds = new double[m_Committee.size()];	double mean,stdDev;		for(int i=0; i<artData.numInstances(); i++){	    curr = artData.instance(i);	    //find the mean and std dev of predictions of committee members	    for(int j=0; j<m_Committee.size(); j++)		preds[j] = ((Classifier)m_Committee.get(j)).classifyInstance(curr);	    	    mean = Utils.mean(preds);	    stdDev = Math.sqrt(Utils.variance(preds));	    //select target value to be perturbed from the mean of the current committee's prediction by the alpha factor	    //curr.setClassValue(m_Random.nextGaussian()*mean + m_Alpha*stdDev); 	    if(m_Random.nextDouble()>0.5) 	    	curr.setClassValue(mean + m_Alpha*stdDev); 	    else curr.setClassValue(mean - m_Alpha*stdDev); 	}	    }        /**     * Removes a specified number of instances from the given set of instances.     *     * @param data given instances     * @param numRemove number of instances to delete from the given instances     */    protected void removeInstances(Instances data, int numRemove){	int num = data.numInstances();	for(int i=num - 1; i>num - 1 - numRemove;i--){	    data.delete(i);	}    }        /**     * Add new instances to the given set of instances.     *     * @param data given instances     * @param newData set of instances to add to given instances     */    protected void addInstances(Instances data, Instances newData){	for(int i=0; i<newData.numInstances(); i++)	    data.add(newData.instance(i));    }        /**      * Computes the error in prediction on the given data.     *     * @param data the instances to be classified     * @return mean absolute error     * @exception Exception if error can not be computed successfully     */    protected double computeError(Instances data) throws Exception {	double error;	m_Evaluation = new Evaluation(data); //reset the counter in Evaluation	m_Evaluation.evaluateModel(this, data);		switch(m_ErrorMeasure){	case MAE:	    error = m_Evaluation.meanAbsoluteError();	    break;	case RMS:	    error = m_Evaluation.rootMeanSquaredError();	    break;	case ROOT_RELATIVE_SQUARED:	    error = m_Evaluation.rootRelativeSquaredError();	    break;	default:	    error = m_Evaluation.meanAbsoluteError();	}	 	return error;    }     /**   * Classifies a given instance.   *   * @param instance the instance to be classified   * @return the predicted value   * @exception Exception if instance could not be predicted successfully   */    public double classifyInstance(Instance instance) throws Exception{	if (!instance.classAttribute().isNumeric())	    throw new UnsupportedClassTypeException("Crate is for numeric classes!");	double pred = 0.0;	Classifier curr;	for (int i = 0; i < m_Committee.size(); i++) {	    curr = (Classifier) m_Committee.get(i);	    pred += curr.classifyInstance(instance);	}	pred /= m_Committee.size();	return pred;    }        /**     * Returns description of the Crate classifier.     *     * @return description of the Crate classifier as a string     */    public String toString() {		if (m_Committee == null) {	    return "Crate: No model built yet.";	}	StringBuffer text = new StringBuffer();	text.append("Crate base classifiers: \n\n");	for (int i = 0; i < m_Committee.size(); i++)	    text.append(((Classifier) m_Committee.get(i)).toString() + "\n\n");	text.append("Number of classifier in the ensemble: "+m_Committee.size()+"\n");	return text.toString();    }        /**     * Main method for testing this class.     *     * @param argv the options     */    public static void main(String [] argv) {		try {	    System.out.println(Evaluation.evaluateModel(new Crate(), argv));	} catch (Exception e) {	    System.err.println(e.getMessage());	}    }}    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -