⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 logisticbase.java

📁 this is code of data mining exatracted from weka tool which is java base
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	// Evaluate / increment trainFs from the classifier	for (int i = 0; i < trainFs.length; i++) {	    double [] pred = new double [m_numClasses];	    double predSum = 0;	    for (int j = 0; j < m_numClasses; j++) {		pred[j] = m_regressions[j][iteration]		    .classifyInstance(trainNumeric.instance(i));		predSum += pred[j];	    }	    predSum /= m_numClasses;	    for (int j = 0; j < m_numClasses; j++) {		trainFs[i][j] += (pred[j] - predSum) * (m_numClasses - 1) 		    / m_numClasses;	    }	}		// Compute the current probability estimates	for (int i = 0; i < trainYs.length; i++) {	    probs[i] = probs(trainFs[i]);	}	return true;    }        /**     * Helper function to initialize m_regressions.     */    protected SimpleLinearRegression[][] initRegressions(){	SimpleLinearRegression[][] classifiers =   	    new SimpleLinearRegression[m_numClasses][m_maxIterations];	for (int j = 0; j < m_numClasses; j++) {	    for (int i = 0; i < m_maxIterations; i++) {		classifiers[j][i] = new SimpleLinearRegression();		classifiers[j][i].setSuppressErrorMessage(true);	    }	}	return classifiers;    }    /**     * Converts training data to numeric version. The class variable is replaced by a pseudo-class      * used by LogitBoost.     */    protected Instances getNumericData(Instances data) throws Exception{	Instances numericData = new Instances(data);		int classIndex = numericData.classIndex();	numericData.setClassIndex(-1);	numericData.deleteAttributeAt(classIndex);	numericData.insertAttributeAt(new Attribute("'pseudo class'"), classIndex);	numericData.setClassIndex(classIndex);	return numericData;    }        /**     * Helper function for cutting back m_regressions to the set of classifiers (corresponsing to the number of      * LogitBoost iterations) that gave the smallest error.     */    protected SimpleLinearRegression[][] selectRegressions(SimpleLinearRegression[][] classifiers){	SimpleLinearRegression[][] goodClassifiers = 	    new SimpleLinearRegression[m_numClasses][m_numRegressions];		for (int j = 0; j < m_numClasses; j++) {	    for (int i = 0; i < m_numRegressions; i++) {		goodClassifiers[j][i] = classifiers[j][i];	    }	}	return goodClassifiers;    }		        /**     * Computes the LogitBoost response variable from y/p values (actual/estimated class probabilities).     */    protected double getZ(double actual, double p) {	double z;	if (actual == 1) {	    z = 1.0 / p;	    if (z > Z_MAX) { // threshold		z = Z_MAX;	    }	} else {	    z = -1.0 / (1.0 - p);	    if (z < -Z_MAX) { // threshold		z = -Z_MAX;	    }	}	return z;    }        /**     * Computes the LogitBoost response for an array of y/p values (actual/estimated class probabilities).     */    protected double[][] getZs(double[][] probs, double[][] dataYs) {		double[][] dataZs = new double[probs.length][m_numClasses];	for (int j = 0; j < m_numClasses; j++) 	    for (int i = 0; i < probs.length; i++) dataZs[i][j] = getZ(dataYs[i][j], probs[i][j]);	return dataZs;    }        /**     * Computes the LogitBoost weights from an array of y/p values (actual/estimated class probabilities).     */    protected double[][] getWs(double[][] probs, double[][] dataYs) {		double[][] dataWs = new double[probs.length][m_numClasses];	for (int j = 0; j < m_numClasses; j++) 	    for (int i = 0; i < probs.length; i++){	    double z = getZ(dataYs[i][j], probs[i][j]);	    dataWs[i][j] = (dataYs[i][j] - probs[i][j]) / z;	    }	return dataWs;    }    /**     * Computes the p-values (probabilities for the classes) from the F-values of the logistic model.     */    protected double[] probs(double[] Fs) {		double maxF = -Double.MAX_VALUE;	for (int i = 0; i < Fs.length; i++) {	    if (Fs[i] > maxF) {		maxF = Fs[i];	    }	}   	double sum = 0;	double[] probs = new double[Fs.length];	for (int i = 0; i < Fs.length; i++) {	    probs[i] = Math.exp(Fs[i] - maxF);   	    sum += probs[i];	}		Utils.normalize(probs, sum);	return probs;    }    /**     * Computes the Y-values (actual class probabilities) for a set of instances.     */    protected double[][] getYs(Instances data){		double [][] dataYs = new double [data.numInstances()][m_numClasses];	for (int j = 0; j < m_numClasses; j++) {	    for (int k = 0; k < data.numInstances(); k++) {		dataYs[k][j] = (data.instance(k).classValue() == j) ? 		    1.0: 0.0;	    }	}	return dataYs;    }    /**     * Computes the F-values for a single instance.     */    protected double[] getFs(Instance instance) throws Exception{		double [] pred = new double [m_numClasses];	double [] instanceFs = new double [m_numClasses]; 		//add up the predictions from the simple regression functions	for (int i = 0; i < m_numRegressions; i++) {	    double predSum = 0;	    for (int j = 0; j < m_numClasses; j++) {		pred[j] = m_regressions[j][i].classifyInstance(instance);		predSum += pred[j];	    }	    predSum /= m_numClasses;	    for (int j = 0; j < m_numClasses; j++) {		instanceFs[j] += (pred[j] - predSum) * (m_numClasses - 1) 		    / m_numClasses;	    }	}			return instanceFs;     }         /**     * Computes the F-values for a set of instances.     */    protected double[][] getFs(Instances data) throws Exception{		double[][] dataFs = new double[data.numInstances()][];       	for (int k = 0; k < data.numInstances(); k++) {	    dataFs[k] = getFs(data.instance(k));	}		return dataFs;	    }       /**     * Computes the p-values (probabilities for the different classes) from the F-values for a set of instances.     */    protected double[][] getProbs(double[][] dataFs){		int numInstances = dataFs.length;	double[][] probs = new double[numInstances][];		for (int k = 0; k < numInstances; k++) {       	    probs[k] = probs(dataFs[k]);	}	return probs;    }        /**     * Returns the likelihood of the Y-values (actual class probabilities) given the      * p-values (current probability estimates).     */    protected double logLikelihood(double[][] dataYs, double[][] probs) {		double logLikelihood = 0;	for (int i = 0; i < dataYs.length; i++) {	    for (int j = 0; j < m_numClasses; j++) {		if (dataYs[i][j] == 1.0) {		    logLikelihood -= Math.log(probs[i][j]);		}	    }	}	return logLikelihood / (double)dataYs.length;    }    /**     * Returns an array of the indices of the attributes used in the logistic model.     * The first dimension is the class, the second dimension holds a list of attribute indices.     * Attribute indices start at zero.     * @return the array of attribute indices     */    public int[][] getUsedAttributes(){		int[][] usedAttributes = new int[m_numClasses][];		//first extract coefficients	double[][] coefficients = getCoefficients();		for (int j = 0; j < m_numClasses; j++){	    	    //boolean array indicating if attribute used	    boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()];	    for (int i = 0; i < attributes.length; i++) {		//attribute used if coefficient > 0		if (!Utils.eq(coefficients[j][i + 1],0)) attributes[i] = true;	    }	    	    	    int numAttributes = 0;	    for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) if (attributes[i]) numAttributes++;	    	    //"collect" all attributes into array of indices	    int[] usedAttributesClass = new int[numAttributes];	    int count = 0;	    for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) {		if (attributes[i]) {		usedAttributesClass[count] = i;		count++;		} 	    }	    	    usedAttributes[j] = usedAttributesClass;	}		return usedAttributes;    }    /**     * The number of LogitBoost iterations performed (= the number of simple regression functions fit).     */    public int getNumRegressions() {	return m_numRegressions;    }    /**     * Sets the parameter "maxIterations".     */    public void setMaxIterations(int maxIterations) {	m_maxIterations = maxIterations;    }        /**     * Sets the option "heuristicStop".     */    public void setHeuristicStop(int heuristicStop){	m_heuristicStop = heuristicStop;    }    /**     * Returns the maxIterations parameter.     */    public int getMaxIterations(){	return m_maxIterations;    }            /**     * Returns an array holding the coefficients of the logistic model.     * First dimension is the class, the second one holds a list of coefficients.     * At position zero, the constant term of the model is stored, then, the coefficients for     * the attributes in ascending order.     * @return the array of coefficients     */    protected double[][] getCoefficients(){	double[][] coefficients = new double[m_numClasses][m_numericDataHeader.numAttributes() + 1];	for (int j = 0; j < m_numClasses; j++) {	    //go through simple regression functions and add their coefficient to the coefficient of	    //the attribute they are built on.	    for (int i = 0; i < m_numRegressions; i++) {				double slope = m_regressions[j][i].getSlope();		double intercept = m_regressions[j][i].getIntercept();		int attribute = m_regressions[j][i].getAttributeIndex();				coefficients[j][0] += intercept;		coefficients[j][attribute + 1] += slope;	    }	}	return coefficients;    }    /**     * Returns the fraction of all attributes in the data that are used in the logistic model (in percent).     * An attribute is used in the model if it is used in any of the models for the different classes.     */    public double percentAttributesUsed(){		boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()];		double[][] coefficients = getCoefficients();	for (int j = 0; j < m_numClasses; j++){	    for (int i = 1; i < m_numericDataHeader.numAttributes() + 1; i++) {		//attribute used if it is used in any class, note coefficients are shifted by one (because		//of constant term).		if (!Utils.eq(coefficients[j][i],0)) attributes[i - 1] = true;	    }	}		//count number of used attributes (without the class attribute)	double count = 0;	for (int i = 0; i < attributes.length; i++) if (attributes[i]) count++;	return count / (double)(m_numericDataHeader.numAttributes() - 1) * 100.0;    }        /**     * Returns a description of the logistic model (i.e., attributes and coefficients).     */    public String toString(){		StringBuffer s = new StringBuffer();		//get used attributes	int[][] attributes = getUsedAttributes();		//get coefficients	double[][] coefficients = getCoefficients();		for (int j = 0; j < m_numClasses; j++) {	    s.append("\nClass "+j+" :\n");	    //constant term	    s.append(Utils.doubleToString(coefficients[j][0],4,2)+" + \n");	    for (int i = 0; i < attributes[j].length; i++) {				//attribute/coefficient pairs		s.append("["+m_numericDataHeader.attribute(attributes[j][i]).name()+"]");		s.append(" * " + Utils.doubleToString(coefficients[j][attributes[j][i]+1],4,2));		if (i != attributes[j].length - 1) s.append(" +");		s.append("\n");	    	    }	}		return new String(s);    }    /**      * Returns class probabilities for an instance.     *     * @exception Exception if distribution can't be computed successfully     */    public double[] distributionForInstance(Instance instance) throws Exception {		instance = (Instance)instance.copy();		//set to numeric pseudo-class      	instance.setDataset(m_numericDataHeader);				//calculate probs via Fs	return probs(getFs(instance));    }    /**     * Cleanup in order to save memory.     */    public void cleanup() {	//save just header info	m_train = new Instances(m_train,0);	m_numericData = null;	    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -