contingencytables.java

来自「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」· Java 代码 · 共 648 行 · 第 1/2 页
JAVA
648 行
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    ContingencyTables.java
 *    Copyright (C) 1999 Eibe Frank
 *
 */

package weka.core;

/**
 * Class implementing some statistical routines for contingency tables.
 *
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class ContingencyTables {

  /** The natural logarithm of 2 */
  private static double log2 = Math.log(2);

  /**
   * Returns chi-squared probability for a given matrix.
   *
   * @param matrix the contigency table
   * @param yates is Yates' correction to be used?
   * @return the chi-squared probability
   */

  public static double chiSquared(double [][] matrix, boolean yates) {

    int df = (matrix.length - 1) * (matrix[0].length - 1);

    return Statistics.chiSquaredProbability(chiVal(matrix, yates), df);
  }

  /**
   * Computes chi-squared statistic for a contingency table.
   *
   * @param matrix the contigency table
   * @param yates is Yates' correction to be used?
   * @return the value of the chi-squared statistic
   */
  public static double chiVal(double [][] matrix, boolean useYates) {
    
    int df, nrows, ncols, row, col;
    double[] rtotal, ctotal;
    double expect = 0, chival = 0, n = 0;
    boolean yates = true;
    
    nrows = matrix.length;
    ncols = matrix[0].length;
    rtotal = new double [nrows];
    ctotal = new double [ncols];
    for (row = 0; row < nrows; row++) {
      for (col = 0; col < ncols; col++) {
	rtotal[row] += matrix[row][col];
	ctotal[col] += matrix[row][col];
	n += matrix[row][col];
      }
    }
    df = (nrows - 1)*(ncols - 1);
    if ((df > 1) || (!useYates)) {
      yates = false;
    } else if (df <= 0) {
      return 0;
    }
    chival = 0.0;
    for (row = 0; row < nrows; row++) {
      if (Utils.gr(rtotal[row], 0)) {
	for (col = 0; col < ncols; col++) {
	  if (Utils.gr(ctotal[col], 0)) {
	    expect = (ctotal[col] * rtotal[row]) / n;
	    chival += chiCell (matrix[row][col], expect, yates);
	  }
	}
      }
    }
    return chival;
  }

  /**
   * Tests if Cochran's criterion is fullfilled for the given
   * contingency table. Rows and columns with all zeros are not considered
   * relevant.
   *
   * @param matrix the contigency table to be tested
   * @return true if contingency table is ok, false if not
   */
  public static boolean cochransCriterion(double[][] matrix) {

    double[] rtotal, ctotal;
    double n = 0, expect, smallfreq = 5;
    int smallcount = 0, nonZeroRows = 0, nonZeroColumns = 0, nrows, ncols, 
      row, col;

    nrows = matrix.length;
    ncols = matrix[0].length;

    rtotal = new double [nrows];
    ctotal = new double [ncols];
    for (row = 0; row < nrows; row++) {
      for (col = 0; col < ncols; col++) {
	rtotal[row] += matrix[row][col];
	ctotal[col] += matrix[row][col];
	n += matrix[row][col];
      }
    }
    for (row = 0; row < nrows; row++) {
      if (Utils.gr(rtotal[row], 0)) {
	nonZeroRows++;
      }
    }
    for (col = 0; col < ncols; col++) {
      if (Utils.gr(ctotal[col], 0)) {
	nonZeroColumns++;
      }
    }
    for (row = 0; row < nrows; row++) {
      if (Utils.gr(rtotal[row], 0)) {
	for (col = 0; col < ncols; col++) {
	  if (Utils.gr(ctotal[col], 0)) {
	    expect = (ctotal[col] * rtotal[row]) / n;
	    if (Utils.sm(expect, smallfreq)) {
	      if (Utils.sm(expect, 1)) {
		return false;
	      } else {
		smallcount++;
		if (smallcount > (nonZeroRows * nonZeroColumns) / smallfreq) {
		  return false;
		}
	      }
	    }
	  }
	}
      }
    }
    return true;
  }

  /**
   * Computes Cramer's V for a contingency table.
   *
   * @param matrix the contingency table
   * @return Cramer's V
   */
  public static double CramersV(double [][] matrix) {

    int row, col, nrows,ncols, min;
    double n = 0;
    
    nrows = matrix.length;
    ncols = matrix[0].length;
    for (row = 0; row < nrows; row++) {
      for (col = 0; col < ncols; col++) {
	n += matrix[row][col];
      }
    }
    min = nrows < ncols ? nrows-1 : ncols-1;
    if ((min == 0) || Utils.eq(n, 0))
      return 0;
    return Math.sqrt(chiVal(matrix, false) / (n * (double)min)); 
  } 

  /**
   * Computes the entropy of the given array.
   *
   * @param array the array
   * @return the entropy
   */
  public static double entropy(double[] array) {

    double returnValue = 0, sum = 0;

    for (int i = 0; i < array.length; i++) {
      returnValue -= lnFunc(array[i]);
      sum += array[i];
    }
    if (Utils.eq(sum, 0)) {
      return 0;
    } else {
      return (returnValue + lnFunc(sum)) / (sum * log2);
    }
  }

  /**
   * Computes conditional entropy of the rows given
   * the columns.
   *
   * @param matrix the contingency table
   * @return the conditional entropy of the rows given the columns
   */
  public static double entropyConditionedOnColumns(double[][] matrix) {
    
    double returnValue = 0, sumForColumn, total = 0;

    for (int j = 0; j < matrix[0].length; j++) {
      sumForColumn = 0;
      for (int i = 0; i < matrix.length; i++) {
	returnValue = returnValue + lnFunc(matrix[i][j]);
	sumForColumn += matrix[i][j];
      }
      returnValue = returnValue - lnFunc(sumForColumn);
      total += sumForColumn;
    }
    if (Utils.eq(total, 0)) {
      return 0;
    }
    return -returnValue / (total * log2);
  }

  /**
   * Computes conditional entropy of the columns given
   * the rows.
   *
   * @param matrix the contingency table
   * @return the conditional entropy of the columns given the rows
   */
  public static double entropyConditionedOnRows(double[][] matrix) {
    
    double returnValue = 0, sumForRow, total = 0;

    for (int i = 0; i < matrix.length; i++) {
      sumForRow = 0;
      for (int j = 0; j < matrix[0].length; j++) {
	returnValue = returnValue + lnFunc(matrix[i][j]);
	sumForRow += matrix[i][j];
      }
      returnValue = returnValue - lnFunc(sumForRow);
      total += sumForRow;
    }
    if (Utils.eq(total, 0)) {
      return 0;
    }
    return -returnValue / (total * log2);
  }

  /**
   * Computes conditional entropy of the columns given the rows
   * of the test matrix with respect to the train matrix. Uses a
   * Laplace prior. Does NOT normalize the entropy.
   *
   * @param train the train matrix 
   * @param test the test matrix
   * @param the number of symbols for Laplace
   * @return the entropy
   */
  public static double entropyConditionedOnRows(double[][] train, 
						double[][] test,
						double numClasses) {
    
    double returnValue = 0, trainSumForRow, testSumForRow, testSum = 0;

    for (int i = 0; i < test.length; i++) {
      trainSumForRow = 0;
      testSumForRow = 0;
      for (int j = 0; j < test[0].length; j++) {
	returnValue -= test[i][j] * Math.log(train[i][j] + 1);
	trainSumForRow += train[i][j];
	testSumForRow += test[i][j];
      }
      testSum = testSumForRow;
      returnValue += testSumForRow * Math.log(trainSumForRow + 
					     numClasses);
    }
    return returnValue / (testSum * log2);
  }

  /**
   * Computes the rows' entropy for the given contingency table.
   *
   * @param matrix the contingency table
   * @return the rows' entropy
   */
  public static double entropyOverRows(double[][] matrix) {
    
    double returnValue = 0, sumForRow, total = 0;

    for (int i = 0; i < matrix.length; i++) {
      sumForRow = 0;
      for (int j = 0; j < matrix[0].length; j++) {
	sumForRow += matrix[i][j];
      }
      returnValue = returnValue - lnFunc(sumForRow);
      total += sumForRow;
    }
    if (Utils.eq(total, 0)) {
      return 0;
    }
    return (returnValue + lnFunc(total)) / (total * log2);
  }

  /**
   * Computes the columns' entropy for the given contingency table.
   *
   * @param matrix the contingency table
   * @return the columns' entropy
   */
  public static double entropyOverColumns(double[][] matrix){
    
    double returnValue = 0, sumForColumn, total = 0;

    for (int j = 0; j < matrix[0].length; j++){
      sumForColumn = 0;
      for (int i = 0; i < matrix.length; i++) {
	sumForColumn += matrix[i][j];
      }
      returnValue = returnValue - lnFunc(sumForColumn);
      total += sumForColumn;
    }
contingencytables.java - 源码说明

本页面展示了「一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码」中的 contingencytables.java 源码文件，采用 Java 编程语言编写，共 648 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与ALPHAMINERR相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?