📄 logisticbase.java
字号:
} } double sum = 0; double[] probs = new double[Fs.length]; for (int i = 0; i < Fs.length; i++) { probs[i] = Math.exp(Fs[i] - maxF); sum += probs[i]; } Utils.normalize(probs, sum); return probs; } /** * Computes the Y-values (actual class probabilities) for a set of instances. * * @param data the data to compute the Y-values from * @return the Y-values */ protected double[][] getYs(Instances data){ double [][] dataYs = new double [data.numInstances()][m_numClasses]; for (int j = 0; j < m_numClasses; j++) { for (int k = 0; k < data.numInstances(); k++) { dataYs[k][j] = (data.instance(k).classValue() == j) ? 1.0: 0.0; } } return dataYs; } /** * Computes the F-values for a single instance. * * @param instance the instance to compute the F-values for * @return the F-values * @throws Exception if something goes wrong */ protected double[] getFs(Instance instance) throws Exception{ double [] pred = new double [m_numClasses]; double [] instanceFs = new double [m_numClasses]; //add up the predictions from the simple regression functions for (int i = 0; i < m_numRegressions; i++) { double predSum = 0; for (int j = 0; j < m_numClasses; j++) { pred[j] = m_regressions[j][i].classifyInstance(instance); predSum += pred[j]; } predSum /= m_numClasses; for (int j = 0; j < m_numClasses; j++) { instanceFs[j] += (pred[j] - predSum) * (m_numClasses - 1) / m_numClasses; } } return instanceFs; } /** * Computes the F-values for a set of instances. * * @param data the data to work on * @return the F-values * @throws Exception if something goes wrong */ protected double[][] getFs(Instances data) throws Exception{ double[][] dataFs = new double[data.numInstances()][]; for (int k = 0; k < data.numInstances(); k++) { dataFs[k] = getFs(data.instance(k)); } return dataFs; } /** * Computes the p-values (probabilities for the different classes) from * the F-values for a set of instances. * * @param dataFs the F-values * @return the p-values */ protected double[][] getProbs(double[][] dataFs){ int numInstances = dataFs.length; double[][] probs = new double[numInstances][]; for (int k = 0; k < numInstances; k++) { probs[k] = probs(dataFs[k]); } return probs; } /** * Returns the negative loglikelihood of the Y-values (actual class probabilities) given the * p-values (current probability estimates). * * @param dataYs the Y-values * @param probs the p-values * @return the likelihood */ protected double negativeLogLikelihood(double[][] dataYs, double[][] probs) { double logLikelihood = 0; for (int i = 0; i < dataYs.length; i++) { for (int j = 0; j < m_numClasses; j++) { if (dataYs[i][j] == 1.0) { logLikelihood -= Math.log(probs[i][j]); } } } return logLikelihood;// / (double)dataYs.length; } /** * Returns an array of the indices of the attributes used in the logistic model. * The first dimension is the class, the second dimension holds a list of attribute indices. * Attribute indices start at zero. * @return the array of attribute indices */ public int[][] getUsedAttributes(){ int[][] usedAttributes = new int[m_numClasses][]; //first extract coefficients double[][] coefficients = getCoefficients(); for (int j = 0; j < m_numClasses; j++){ //boolean array indicating if attribute used boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()]; for (int i = 0; i < attributes.length; i++) { //attribute used if coefficient > 0 if (!Utils.eq(coefficients[j][i + 1],0)) attributes[i] = true; } int numAttributes = 0; for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) if (attributes[i]) numAttributes++; //"collect" all attributes into array of indices int[] usedAttributesClass = new int[numAttributes]; int count = 0; for (int i = 0; i < m_numericDataHeader.numAttributes(); i++) { if (attributes[i]) { usedAttributesClass[count] = i; count++; } } usedAttributes[j] = usedAttributesClass; } return usedAttributes; } /** * The number of LogitBoost iterations performed (= the number of simple * regression functions fit). * * @return the number of LogitBoost iterations performed */ public int getNumRegressions() { return m_numRegressions; } /** * Get the value of weightTrimBeta. * * @return Value of weightTrimBeta. */ public double getWeightTrimBeta(){ return m_weightTrimBeta; } /** * Get the value of useAIC. * * @return Value of useAIC. */ public boolean getUseAIC(){ return m_useAIC; } /** * Sets the parameter "maxIterations". * * @param maxIterations the maximum iterations */ public void setMaxIterations(int maxIterations) { m_maxIterations = maxIterations; } /** * Sets the option "heuristicStop". * * @param heuristicStop the heuristic stop to use */ public void setHeuristicStop(int heuristicStop){ m_heuristicStop = heuristicStop; } /** * Sets the option "weightTrimBeta". */ public void setWeightTrimBeta(double w){ m_weightTrimBeta = w; } /** * Set the value of useAIC. * * @param c Value to assign to useAIC. */ public void setUseAIC(boolean c){ m_useAIC = c; } /** * Returns the maxIterations parameter. * * @return the maximum iteration */ public int getMaxIterations(){ return m_maxIterations; } /** * Returns an array holding the coefficients of the logistic model. * First dimension is the class, the second one holds a list of coefficients. * At position zero, the constant term of the model is stored, then, the coefficients for * the attributes in ascending order. * @return the array of coefficients */ protected double[][] getCoefficients(){ double[][] coefficients = new double[m_numClasses][m_numericDataHeader.numAttributes() + 1]; for (int j = 0; j < m_numClasses; j++) { //go through simple regression functions and add their coefficient to the coefficient of //the attribute they are built on. for (int i = 0; i < m_numRegressions; i++) { double slope = m_regressions[j][i].getSlope(); double intercept = m_regressions[j][i].getIntercept(); int attribute = m_regressions[j][i].getAttributeIndex(); coefficients[j][0] += intercept; coefficients[j][attribute + 1] += slope; } } return coefficients; } /** * Returns the fraction of all attributes in the data that are used in the * logistic model (in percent). * An attribute is used in the model if it is used in any of the models for * the different classes. * * @return the fraction of all attributes that are used */ public double percentAttributesUsed(){ boolean[] attributes = new boolean[m_numericDataHeader.numAttributes()]; double[][] coefficients = getCoefficients(); for (int j = 0; j < m_numClasses; j++){ for (int i = 1; i < m_numericDataHeader.numAttributes() + 1; i++) { //attribute used if it is used in any class, note coefficients are shifted by one (because //of constant term). if (!Utils.eq(coefficients[j][i],0)) attributes[i - 1] = true; } } //count number of used attributes (without the class attribute) double count = 0; for (int i = 0; i < attributes.length; i++) if (attributes[i]) count++; return count / (double)(m_numericDataHeader.numAttributes() - 1) * 100.0; } /** * Returns a description of the logistic model (i.e., attributes and * coefficients). * * @return the description of the model */ public String toString(){ StringBuffer s = new StringBuffer(); //get used attributes int[][] attributes = getUsedAttributes(); //get coefficients double[][] coefficients = getCoefficients(); for (int j = 0; j < m_numClasses; j++) { s.append("\nClass "+j+" :\n"); //constant term s.append(Utils.doubleToString(coefficients[j][0],4,2)+" + \n"); for (int i = 0; i < attributes[j].length; i++) { //attribute/coefficient pairs s.append("["+m_numericDataHeader.attribute(attributes[j][i]).name()+"]"); s.append(" * " + Utils.doubleToString(coefficients[j][attributes[j][i]+1],4,2)); if (i != attributes[j].length - 1) s.append(" +"); s.append("\n"); } } return new String(s); } /** * Returns class probabilities for an instance. * * @param instance the instance to compute the distribution for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ public double[] distributionForInstance(Instance instance) throws Exception { instance = (Instance)instance.copy(); //set to numeric pseudo-class instance.setDataset(m_numericDataHeader); //calculate probs via Fs return probs(getFs(instance)); } /** * Cleanup in order to save memory. */ public void cleanup() { //save just header info m_train = new Instances(m_train,0); m_numericData = null; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -