📄 logisticbase.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
     */    protected void performBoosting(int numIterations) throws Exception{	//initialize Ys/Fs/ps 	double[][] trainYs = getYs(m_train);	double[][] trainFs = getFs(m_numericData);			double[][] probs = getProbs(trainFs);		int iteration = 0;	//run iterations	while (iteration < numIterations) {	    boolean foundAttribute = performIteration(iteration, trainYs, trainFs, probs, m_numericData);	    if (foundAttribute) iteration++;	    else break;	}		m_numRegressions = iteration;    }        /**     * Runs LogitBoost using the stopping criterion on the training set.     * The number of iterations is used that gives the lowest error on the training set, either misclassification     * or error on probabilities (depending on the errorOnProbabilities option).     * @throws Exception if something goes wrong     */    protected void performBoosting() throws Exception{		//initialize Ys/Fs/ps	double[][] trainYs = getYs(m_train);	double[][] trainFs = getFs(m_numericData);			double[][] probs = getProbs(trainFs);		int iteration = 0;	double[] trainErrors = new double[m_maxIterations+1];	trainErrors[0] = getErrorRate(m_train);		int noMin = 0;	double lastMin = Double.MAX_VALUE;		while (iteration < m_maxIterations) {	    boolean foundAttribute = performIteration(iteration, trainYs, trainFs, probs, m_numericData);	    if (foundAttribute) {		iteration++;		m_numRegressions = iteration;	    } else {				//could not fit simple regression		break;	    }	    	    trainErrors[iteration] = getErrorRate(m_train);	    	 	    //heuristic: stop LogitBoost if the current minimum has not changed for <m_heuristicStop> iterations	    if (noMin > m_heuristicStop) break;	    	    if (trainErrors[iteration] < lastMin) {		lastMin = trainErrors[iteration];		noMin = 0;	    } else {		noMin++;	    }	}		//find iteration with best error        m_numRegressions = getBestIteration(trainErrors, iteration);	    }    /**     * Returns the misclassification error of the current model on a set of instances.     * @param data the set of instances     * @return the error rate     * @throws Exception if something goes wrong     */    protected double getErrorRate(Instances data) throws Exception {	Evaluation eval = new Evaluation(data);	eval.evaluateModel(this,data);	return eval.errorRate();    }    /**     * Returns the error of the probability estimates for the current model on a set of instances.     * @param data the set of instances     * @return the error     * @throws Exception if something goes wrong     */    protected double getMeanAbsoluteError(Instances data) throws Exception {	Evaluation eval = new Evaluation(data);	eval.evaluateModel(this,data);	return eval.meanAbsoluteError();    }    /**     * Helper function to find the minimum in an array of error values.     *      * @param errors an array containing errors     * @param maxIteration the maximum of iterations     * @return the minimum     */    protected int getBestIteration(double[] errors, int maxIteration) {	double bestError = errors[0];	int bestIteration = 0;	for (int i = 1; i <= maxIteration; i++) {	    	    if (errors[i] < bestError) {		bestError = errors[i];		bestIteration = i;			    }	} 	return bestIteration;    }    /**     * Performs a single iteration of LogitBoost, and updates the model accordingly.     * A simple regression function is fit to the response and added to the m_regressions array.     * @param iteration the current iteration      * @param trainYs the y-values (see description of LogitBoost) for the model trained so far     * @param trainFs the F-values (see description of LogitBoost) for the model trained so far     * @param probs the p-values (see description of LogitBoost) for the model trained so far     * @param trainNumeric numeric version of the training data     * @return returns true if iteration performed successfully, false if no simple regression function      * could be fitted.     * @throws Exception if something goes wrong     */    protected boolean performIteration(int iteration, 				       double[][] trainYs,				       double[][] trainFs,				       double[][] probs,				       Instances trainNumeric) throws Exception {		for (int j = 0; j < m_numClasses; j++) {            // Keep track of sum of weights            double[] weights = new double[trainNumeric.numInstances()];            double weightSum = 0.0;	    	    //make copy of data (need to save the weights) 	    Instances boostData = new Instances(trainNumeric);	    	    for (int i = 0; i < trainNumeric.numInstances(); i++) {				//compute response and weight		double p = probs[i][j];		double actual = trainYs[i][j];		double z = getZ(actual, p);		double w = (actual - p) / z;				//set values for instance 		Instance current = boostData.instance(i);		current.setValue(boostData.classIndex(), z);		current.setWeight(current.weight() * w);				                                weights[i] = current.weight();                weightSum += current.weight();	    }                        Instances instancesCopy = new Instances(boostData);                        if (weightSum > 0) {                // Only the (1-beta)th quantile of instances are sent to the base classifier                if (m_weightTrimBeta > 0) {                    double weightPercentage = 0.0;                    int[] weightsOrder = new int[trainNumeric.numInstances()];                    weightsOrder = Utils.sort(weights);                    instancesCopy.delete();                                                            for (int i = weightsOrder.length-1; (i >= 0) && (weightPercentage < (1-m_weightTrimBeta)); i--) {                        instancesCopy.add(boostData.instance(weightsOrder[i]));                        weightPercentage += (weights[weightsOrder[i]] / weightSum);                                            }                }                                //Scale the weights                weightSum = instancesCopy.sumOfWeights();                for (int i = 0; i < instancesCopy.numInstances(); i++) {                    Instance current = instancesCopy.instance(i);                    current.setWeight(current.weight() * (double)instancesCopy.numInstances() / weightSum);                }            }	    	    //fit simple regression function	    m_regressions[j][iteration].buildClassifier(instancesCopy);	    	    boolean foundAttribute = m_regressions[j][iteration].foundUsefulAttribute();	    if (!foundAttribute) {		//could not fit simple regression function		return false;	    }	    	}		// Evaluate / increment trainFs from the classifier	for (int i = 0; i < trainFs.length; i++) {	    double [] pred = new double [m_numClasses];	    double predSum = 0;	    for (int j = 0; j < m_numClasses; j++) {		pred[j] = m_regressions[j][iteration]		    .classifyInstance(trainNumeric.instance(i));		predSum += pred[j];	    }	    predSum /= m_numClasses;	    for (int j = 0; j < m_numClasses; j++) {		trainFs[i][j] += (pred[j] - predSum) * (m_numClasses - 1) 		    / m_numClasses;	    }	}		// Compute the current probability estimates	for (int i = 0; i < trainYs.length; i++) {	    probs[i] = probs(trainFs[i]);	}	return true;    }        /**     * Helper function to initialize m_regressions.     *      * @return the generated classifiers     */    protected SimpleLinearRegression[][] initRegressions(){	SimpleLinearRegression[][] classifiers =   	    new SimpleLinearRegression[m_numClasses][m_maxIterations];	for (int j = 0; j < m_numClasses; j++) {	    for (int i = 0; i < m_maxIterations; i++) {		classifiers[j][i] = new SimpleLinearRegression();		classifiers[j][i].setSuppressErrorMessage(true);	    }	}	return classifiers;    }    /**     * Converts training data to numeric version. The class variable is replaced by a pseudo-class      * used by LogitBoost.     *      * @param data the data to convert     * @return the converted data     * @throws Exception if something goes wrong     */    protected Instances getNumericData(Instances data) throws Exception{	Instances numericData = new Instances(data);		int classIndex = numericData.classIndex();	numericData.setClassIndex(-1);	numericData.deleteAttributeAt(classIndex);	numericData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex);	numericData.setClassIndex(classIndex);	return numericData;    }        /**     * Helper function for cutting back m_regressions to the set of classifiers      * (corresponsing to the number of LogitBoost iterations) that gave the      * smallest error.     *      * @param classifiers the original set of classifiers     * @return the cut back set of classifiers     */    protected SimpleLinearRegression[][] selectRegressions(SimpleLinearRegression[][] classifiers){	SimpleLinearRegression[][] goodClassifiers = 	    new SimpleLinearRegression[m_numClasses][m_numRegressions];		for (int j = 0; j < m_numClasses; j++) {	    for (int i = 0; i < m_numRegressions; i++) {		goodClassifiers[j][i] = classifiers[j][i];	    }	}	return goodClassifiers;    }		        /**     * Computes the LogitBoost response variable from y/p values      * (actual/estimated class probabilities).     *      * @param actual the actual class probability     * @param p the estimated class probability     * @return the LogitBoost response     */    protected double getZ(double actual, double p) {	double z;	if (actual == 1) {	    z = 1.0 / p;	    if (z > Z_MAX) { // threshold		z = Z_MAX;	    }	} else {	    z = -1.0 / (1.0 - p);	    if (z < -Z_MAX) { // threshold		z = -Z_MAX;	    }	}	return z;    }        /**     * Computes the LogitBoost response for an array of y/p values      * (actual/estimated class probabilities).     *      * @param dataYs the actual class probabilities     * @param probs the estimated class probabilities     * @return the LogitBoost response     */    protected double[][] getZs(double[][] probs, double[][] dataYs) {		double[][] dataZs = new double[probs.length][m_numClasses];	for (int j = 0; j < m_numClasses; j++) 	    for (int i = 0; i < probs.length; i++) dataZs[i][j] = getZ(dataYs[i][j], probs[i][j]);	return dataZs;    }        /**     * Computes the LogitBoost weights from an array of y/p values      * (actual/estimated class probabilities).     *      * @param dataYs the actual class probabilities     * @param probs the estimated class probabilities     * @return the LogitBoost weights     */    protected double[][] getWs(double[][] probs, double[][] dataYs) {		double[][] dataWs = new double[probs.length][m_numClasses];	for (int j = 0; j < m_numClasses; j++) 	    for (int i = 0; i < probs.length; i++){	    double z = getZ(dataYs[i][j], probs[i][j]);	    dataWs[i][j] = (dataYs[i][j] - probs[i][j]) / z;	    }	return dataWs;    }    /**     * Computes the p-values (probabilities for the classes) from the F-values      * of the logistic model.     *      * @param Fs the F-values     * @return the p-values     */    protected double[] probs(double[] Fs) {		double maxF = -Double.MAX_VALUE;	for (int i = 0; i < Fs.length; i++) {	    if (Fs[i] > maxF) {		maxF = Fs[i];
💿 文件大小 3872 K
👤 上传用户 fengkuangyidao
📂 所属分类 Applet
🏷️ 相关标签

#Java #编写 #数据挖掘算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -