📄 contingencytables.java
字号:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.core;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
/**
* Class implementing some statistical routines for contingency tables.
*
*/
public class ContingencyTables {
/** The natural logarithm of 2 */
private static double log2 = Math.log(2);
/**
* Returns chi-squared probability for a given matrix.
*
* @param matrix the contigency table
* @param yates is Yates' correction to be used?
* @return the chi-squared probability
*/
public static double chiSquared(double [][] matrix, boolean yates) {
int df = (matrix.length - 1) * (matrix[0].length - 1);
return Statistics.chiSquaredProbability(chiVal(matrix, yates), df);
}
/**
* Computes chi-squared statistic for a contingency table.
*
* @param matrix the contigency table
* @param yates is Yates' correction to be used?
* @return the value of the chi-squared statistic
*/
public static double chiVal(double [][] matrix, boolean useYates) {
int df, nrows, ncols, row, col;
double[] rtotal, ctotal;
double expect = 0, chival = 0, n = 0;
boolean yates = true;
nrows = matrix.length;
ncols = matrix[0].length;
rtotal = new double [nrows];
ctotal = new double [ncols];
for (row = 0; row < nrows; row++) {
for (col = 0; col < ncols; col++) {
rtotal[row] += matrix[row][col];
ctotal[col] += matrix[row][col];
n += matrix[row][col];
}
}
df = (nrows - 1)*(ncols - 1);
if ((df > 1) || (!useYates)) {
yates = false;
} else if (df <= 0) {
return 0;
}
chival = 0.0;
for (row = 0; row < nrows; row++) {
if (Utils.gr(rtotal[row], 0)) {
for (col = 0; col < ncols; col++) {
if (Utils.gr(ctotal[col], 0)) {
expect = (ctotal[col] * rtotal[row]) / n;
chival += chiCell (matrix[row][col], expect, yates);
}
}
}
}
return chival;
}
/**
* Tests if Cochran's criterion is fullfilled for the given
* contingency table. Rows and columns with all zeros are not considered
* relevant.
*
* @param matrix the contigency table to be tested
* @return true if contingency table is ok, false if not
*/
public static boolean cochransCriterion(double[][] matrix) {
double[] rtotal, ctotal;
double n = 0, expect, smallfreq = 5;
int smallcount = 0, nonZeroRows = 0, nonZeroColumns = 0, nrows, ncols,
row, col;
nrows = matrix.length;
ncols = matrix[0].length;
rtotal = new double [nrows];
ctotal = new double [ncols];
for (row = 0; row < nrows; row++) {
for (col = 0; col < ncols; col++) {
rtotal[row] += matrix[row][col];
ctotal[col] += matrix[row][col];
n += matrix[row][col];
}
}
for (row = 0; row < nrows; row++) {
if (Utils.gr(rtotal[row], 0)) {
nonZeroRows++;
}
}
for (col = 0; col < ncols; col++) {
if (Utils.gr(ctotal[col], 0)) {
nonZeroColumns++;
}
}
for (row = 0; row < nrows; row++) {
if (Utils.gr(rtotal[row], 0)) {
for (col = 0; col < ncols; col++) {
if (Utils.gr(ctotal[col], 0)) {
expect = (ctotal[col] * rtotal[row]) / n;
if (Utils.sm(expect, smallfreq)) {
if (Utils.sm(expect, 1)) {
return false;
} else {
smallcount++;
if (smallcount > (nonZeroRows * nonZeroColumns) / smallfreq) {
return false;
}
}
}
}
}
}
}
return true;
}
/**
* Computes Cramer's V for a contingency table.
*
* @param matrix the contingency table
* @return Cramer's V
*/
public static double CramersV(double [][] matrix) {
int row, col, nrows,ncols, min;
double n = 0;
nrows = matrix.length;
ncols = matrix[0].length;
for (row = 0; row < nrows; row++) {
for (col = 0; col < ncols; col++) {
n += matrix[row][col];
}
}
min = nrows < ncols ? nrows-1 : ncols-1;
if ((min == 0) || Utils.eq(n, 0))
return 0;
return Math.sqrt(chiVal(matrix, false) / (n * (double)min));
}
/**
* Computes the entropy of the given array.
*
* @param array the array
* @return the entropy
*/
public static double entropy(double[] array) {
double returnValue = 0, sum = 0;
for (int i = 0; i < array.length; i++) {
returnValue -= lnFunc(array[i]);
sum += array[i];
}
if (Utils.eq(sum, 0)) {
return 0;
} else {
return (returnValue + lnFunc(sum)) / (sum * log2);
}
}
/**
* Computes conditional entropy of the rows given
* the columns.
*
* @param matrix the contingency table
* @return the conditional entropy of the rows given the columns
*/
public static double entropyConditionedOnColumns(double[][] matrix) {
double returnValue = 0, sumForColumn, total = 0;
for (int j = 0; j < matrix[0].length; j++) {
sumForColumn = 0;
for (int i = 0; i < matrix.length; i++) {
returnValue = returnValue + lnFunc(matrix[i][j]);
sumForColumn += matrix[i][j];
}
returnValue = returnValue - lnFunc(sumForColumn);
total += sumForColumn;
}
if (Utils.eq(total, 0)) {
return 0;
}
return -returnValue / (total * log2);
}
/**
* Computes conditional entropy of the columns given
* the rows.
*
* @param matrix the contingency table
* @return the conditional entropy of the columns given the rows
*/
public static double entropyConditionedOnRows(double[][] matrix) {
double returnValue = 0, sumForRow, total = 0;
for (int i = 0; i < matrix.length; i++) {
sumForRow = 0;
for (int j = 0; j < matrix[0].length; j++) {
returnValue = returnValue + lnFunc(matrix[i][j]);
sumForRow += matrix[i][j];
}
returnValue = returnValue - lnFunc(sumForRow);
total += sumForRow;
}
if (Utils.eq(total, 0)) {
return 0;
}
return -returnValue / (total * log2);
}
/**
* Computes conditional entropy of the columns given the rows
* of the test matrix with respect to the train matrix. Uses a
* Laplace prior. Does NOT normalize the entropy.
*
* @param train the train matrix
* @param test the test matrix
* @param the number of symbols for Laplace
* @return the entropy
*/
public static double entropyConditionedOnRows(double[][] train,
double[][] test,
double numClasses) {
double returnValue = 0, trainSumForRow, testSumForRow, testSum = 0;
for (int i = 0; i < test.length; i++) {
trainSumForRow = 0;
testSumForRow = 0;
for (int j = 0; j < test[0].length; j++) {
returnValue -= test[i][j] * Math.log(train[i][j] + 1);
trainSumForRow += train[i][j];
testSumForRow += test[i][j];
}
testSum = testSumForRow;
returnValue += testSumForRow * Math.log(trainSumForRow +
numClasses);
}
return returnValue / (testSum * log2);
}
/**
* Computes the rows' entropy for the given contingency table.
*
* @param matrix the contingency table
* @return the rows' entropy
*/
public static double entropyOverRows(double[][] matrix) {
double returnValue = 0, sumForRow, total = 0;
for (int i = 0; i < matrix.length; i++) {
sumForRow = 0;
for (int j = 0; j < matrix[0].length; j++) {
sumForRow += matrix[i][j];
}
returnValue = returnValue - lnFunc(sumForRow);
total += sumForRow;
}
if (Utils.eq(total, 0)) {
return 0;
}
return (returnValue + lnFunc(total)) / (total * log2);
}
/**
* Computes the columns' entropy for the given contingency table.
*
* @param matrix the contingency table
* @return the columns' entropy
*/
public static double entropyOverColumns(double[][] matrix){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -