📄 logisticbase.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
	    }	}   	double sum = 0;	double[] probs = new double[Fs.length];	for (int i = 0; i < Fs.length; i++) {	    probs[i] = Math.exp(Fs[i] - maxF);   	    sum += probs[i];	}		Utils.normalize(probs, sum);	return probs;    }    /**     * Computes the Y-values (actual class probabilities) for a set of instances.     *      * @param data the data to compute the Y-values from     * @return the Y-values     */    protected double[][] getYs(Instances data){		double [][] dataYs = new double [data.numInstances()][m_numClasses];	for (int j = 0; j < m_numClasses; j++) {	    for (int k = 0; k < data.numInstances(); k++) {		dataYs[k][j] = (data.instance(k).classValue() == j) ? 		    1.0: 0.0;	    }	}	return dataYs;    }    /**     * Computes the F-values for a single instance.     *      * @param instance the instance to compute the F-values for     * @return the F-values     * @throws Exception if something goes wrong     */    protected double[] getFs(Instance instance) throws Exception{		double [] pred = new double [m_numClasses];	double [] instanceFs = new double [m_numClasses]; 		//add up the predictions from the simple regression functions	for (int i = 0; i < m_numRegressions; i++) {	    double predSum = 0;	    for (int j = 0; j < m_numClasses; j++) {		pred[j] = m_regressions[j][i].classifyInstance(instance);		predSum += pred[j];	    }	    predSum /= m_numClasses;	    for (int j = 0; j < m_numClasses; j++) {		instanceFs[j] += (pred[j] - predSum) * (m_numClasses - 1) 		    / m_numClasses;	    }	}			return instanceFs;     }         /**     * Computes the F-values for a set of instances.     *      * @param data the data to work on     * @return the F-values     * @throws Exception if something goes wrong     */    protected double[][] getFs(Instances data) throws Exception{		double[][] dataFs = new double[data.numInstances()][];       	for (int k = 0; k < data.numInstances(); k++) {	    dataFs[k] = getFs(data.instance(k));	}		return dataFs;	    }       /**     * Computes the p-values (probabilities for the different classes) from      * the F-values for a set of instances.     *      * @param dataFs the F-values     * @return the p-values     */    protected double[][] getProbs(double[][] dataFs){		int numInstances = dataFs.length;	double[][] probs = new double[numInstances][];		for (int k = 0; k < numInstances; k++) {       	    probs[k] = probs(dataFs[k]);	}	return probs;    }        /**     * Returns the negative loglikelihood of the Y-values (actual class probabilities) given the      * p-values (current probability estimates).     *      * @param dataYs the Y-values     * @param probs the p-values     * @return the likelihood     */    protected double negativeLogLikelihood(double[][] dataYs, double[][] probs) {		double logLikelihood = 0;	for (int i = 0; i < dataYs.length; i++) {	    for (int j = 0; j < m_numClasses; j++) {		if (dataYs[i][j] == 1.0) {		    logLikelihood -= Math.log(probs[i][j]);		}	    }	}	return logLikelihood;// / (double)dataYs.length;    }    /**     * Returns an array of the indices of the attributes used in the logistic model.     * The first dimension is the class, the second dimension holds a list of attribute indices.     * Attribute indices start at zero.     * @return the array of attribute indices     */    public int[][] getUsedAttributes(){		int[][] usedAttributes = new int[m_numClasses][];		//first extract coefficients	double[][] coefficients = getCoefficients();		for (int j = 0; j < m_numClasses; j++){	    	    //boolean array indicating if attribute used	    boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()];	    for (int i = 0; i < attributes.length; i++) {		//attribute used if coefficient > 0		if (!Utils.eq(coefficients[j][i + 1],0)) attributes[i] = true;	    }	    	    	    int numAttributes = 0;	    for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) if (attributes[i]) numAttributes++;	    	    //"collect" all attributes into array of indices	    int[] usedAttributesClass = new int[numAttributes];	    int count = 0;	    for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) {		if (attributes[i]) {		usedAttributesClass[count] = i;		count++;		} 	    }	    	    usedAttributes[j] = usedAttributesClass;	}		return usedAttributes;    }    /**     * The number of LogitBoost iterations performed (= the number of simple      * regression functions fit).     *      * @return the number of LogitBoost iterations performed      */    public int getNumRegressions() {	return m_numRegressions;    }        /**     * Get the value of weightTrimBeta.     *     * @return Value of weightTrimBeta.     */    public double getWeightTrimBeta(){        return m_weightTrimBeta;    }        /**     * Get the value of useAIC.     *     * @return Value of useAIC.     */    public boolean getUseAIC(){        return m_useAIC;    }    /**     * Sets the parameter "maxIterations".     *      * @param maxIterations the maximum iterations     */    public void setMaxIterations(int maxIterations) {	m_maxIterations = maxIterations;    }        /**     * Sets the option "heuristicStop".     *      * @param heuristicStop the heuristic stop to use     */    public void setHeuristicStop(int heuristicStop){	m_heuristicStop = heuristicStop;    }        /**     * Sets the option "weightTrimBeta".     */    public void setWeightTrimBeta(double w){        m_weightTrimBeta = w;    }        /**     * Set the value of useAIC.     *     * @param c Value to assign to useAIC.     */    public void setUseAIC(boolean c){        m_useAIC = c;    }    /**     * Returns the maxIterations parameter.     *      * @return the maximum iteration     */    public int getMaxIterations(){	return m_maxIterations;    }            /**     * Returns an array holding the coefficients of the logistic model.     * First dimension is the class, the second one holds a list of coefficients.     * At position zero, the constant term of the model is stored, then, the coefficients for     * the attributes in ascending order.     * @return the array of coefficients     */    protected double[][] getCoefficients(){	double[][] coefficients = new double[m_numClasses][m_numericDataHeader.numAttributes() + 1];	for (int j = 0; j < m_numClasses; j++) {	    //go through simple regression functions and add their coefficient to the coefficient of	    //the attribute they are built on.	    for (int i = 0; i < m_numRegressions; i++) {				double slope = m_regressions[j][i].getSlope();		double intercept = m_regressions[j][i].getIntercept();		int attribute = m_regressions[j][i].getAttributeIndex();				coefficients[j][0] += intercept;		coefficients[j][attribute + 1] += slope;	    }	}	return coefficients;    }    /**     * Returns the fraction of all attributes in the data that are used in the      * logistic model (in percent).      * An attribute is used in the model if it is used in any of the models for      * the different classes.     *      * @return the fraction of all attributes that are used     */    public double percentAttributesUsed(){		boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()];		double[][] coefficients = getCoefficients();	for (int j = 0; j < m_numClasses; j++){	    for (int i = 1; i < m_numericDataHeader.numAttributes() + 1; i++) {		//attribute used if it is used in any class, note coefficients are shifted by one (because		//of constant term).		if (!Utils.eq(coefficients[j][i],0)) attributes[i - 1] = true;	    }	}		//count number of used attributes (without the class attribute)	double count = 0;	for (int i = 0; i < attributes.length; i++) if (attributes[i]) count++;	return count / (double)(m_numericDataHeader.numAttributes() - 1) * 100.0;    }        /**     * Returns a description of the logistic model (i.e., attributes and      * coefficients).     *      * @return the description of the model     */    public String toString(){		StringBuffer s = new StringBuffer();		//get used attributes	int[][] attributes = getUsedAttributes();		//get coefficients	double[][] coefficients = getCoefficients();		for (int j = 0; j < m_numClasses; j++) {	    s.append("\nClass "+j+" :\n");	    //constant term	    s.append(Utils.doubleToString(coefficients[j][0],4,2)+" + \n");	    for (int i = 0; i < attributes[j].length; i++) {				//attribute/coefficient pairs		s.append("["+m_numericDataHeader.attribute(attributes[j][i]).name()+"]");		s.append(" * " + Utils.doubleToString(coefficients[j][attributes[j][i]+1],4,2));		if (i != attributes[j].length - 1) s.append(" +");		s.append("\n");	    	    }	}		return new String(s);    }    /**      * Returns class probabilities for an instance.     *     * @param instance the instance to compute the distribution for     * @return the class probabilities     * @throws Exception if distribution can't be computed successfully     */    public double[] distributionForInstance(Instance instance) throws Exception {		instance = (Instance)instance.copy();		//set to numeric pseudo-class      	instance.setDataset(m_numericDataHeader);				//calculate probs via Fs	return probs(getFs(instance));    }    /**     * Cleanup in order to save memory.     */    public void cleanup() {	//save just header info	m_train = new Instances(m_train,0);	m_numericData = null;	    }}
上一页 1 23
💿 文件大小 3872 K
👤 上传用户 fengkuangyidao
📂 所属分类 Applet
🏷️ 相关标签

#Java #编写 #数据挖掘算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -