⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 externalvalidity.cc

📁 一种聚类算法,名字是cocluster
💻 CC
字号:
/*  ExternalVality.cc    Implementation of the ExternalValidity class    Copyright (c) 2005, 2006              by Hyuk Cho    Copyright (c) 2003, 2004    	      by Hyuk Cho, Yuqiang Guan, and Suvrit Sra                {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <algorithm>#include <vector>#include <cmath>#include <stdio.h>#include <assert.h>#include "ExternalValidity.h"using namespace std;ExternalValidity::ExternalValidity(int nClass, int nCluster, int nPoint, int *classLbl, int *clusterLbl){  numClass = nClass;  numCluster = nCluster;  numPoint = nPoint;  classLabel = classLbl;  clusterLabel = clusterLbl;  clusterSize = new int [numCluster];  classSize = new int [numClass];  confusionMatrix = new int * [numCluster];  for (int i = 0; i < numCluster; i++)    confusionMatrix[i] = new int [numClass];  for (int i = 0; i < numCluster; i++)    clusterSize[i] = 0;  for (int i = 0; i < numClass; i++)    classSize[i] = 0;  for (int i = 0; i < numPoint; i++){    clusterSize[clusterLabel[i]]++;    classSize[classLabel[i]]++;  }  for(int i = 0; i < numCluster; i++)    for (int j = 0; j < numClass; j++)      confusionMatrix[i][j] = 0;  for (int i = 0; i < numPoint; i++)    confusionMatrix[clusterLabel[i]][classLabel[i]]++;  isSilent = false;		// not used...  memoryUsed += (numCluster*numClass + numCluster + numClass) * sizeof(int);}ExternalValidity::~ExternalValidity(){  for (int j=0; j< numClass; j++)    delete [] confusionMatrix[j];  delete [] confusionMatrix;  delete [] classSize;  delete [] clusterSize;}void ExternalValidity::setSilent(bool s)	// not used...{  isSilent = s;}void ExternalValidity::printCM(ostream &os){  if ((!os) == 0){    os << "  Confusion Matrix" << endl;    for (int i = 0; i < numCluster; i++){//      os << endl << "\t";      for (int j = 0; j < numClass; j++)        os << "\t" << confusionMatrix[i][j];      os << endl;    }    os << endl;  }}void ExternalValidity::purity_Entropy_MutInfo(bool isShowingEachCluster, ostream &os1, ostream &os2, ostream &os3){  int *sum_row, *sum_col;  double sum, max, mut_info = 0.0, average_purity = 0.0, average_entropy = 0.0;  sum_row = new int [numCluster];  sum_col = new int [numClass];  for(int i = 0;i < numCluster; i++){    sum = 0.0;    max = -1;    for(int j = 0; j < numClass; j++){      if (max < confusionMatrix[i][j])        max = confusionMatrix[i][j];      if (clusterSize[i] != 0 && confusionMatrix[i][j] != 0)        sum += (double)confusionMatrix[i][j] / clusterSize[i] * log((double)clusterSize[i] / confusionMatrix[i][j]) / log((double)numClass);    }    if(clusterSize[i] != 0){        if (isShowingEachCluster){        if ((!os1) == 0){          os1 << "  Purity of cluster  " << i << "        = " << (double)max/clusterSize[i] << endl;          os1 << "  Entropy of cluster " << i << "        = " << sum*log(2.0) << endl;        }        if ((!os2) == 0){          os2 << "  Purity of cluster  " << i << "        = " << (double)max/clusterSize[i] << endl;          os2 << "  Entropy of cluster " << i << "        = " << sum*log(2.0) << endl;        }        if ((!os3) == 0){          os3 << "  Purity of cluster  " << i << "        = " << (double)max/clusterSize[i] << endl;          os3 << "  Entropy of cluster " << i << "        = " << sum*log(2.0) << endl;        }      }      average_purity += (double)max / clusterSize[i];    }     average_entropy += sum;  }  if ((!os1) == 0){    os1 << "  Average Purity of clusters  = " << average_purity / numCluster << endl;    os1 << "  Average Entropy of clusters = " << average_entropy * log(2.0) / numCluster << endl;  }  if ((!os2) == 0){    os2 << "  Average Purity of clusters  = " << average_purity / numCluster << endl;    os2 << "  Average Entropy of clusters = " << average_entropy * log(2.0) / numCluster << endl;  }  if ((!os3) == 0){    os3 << "  Average Purity of clusters  = " << average_purity / numCluster << endl;    os3 << "  Average Entropy of clusters = " << average_entropy * log(2.0) / numCluster << endl;  }  for(int i = 0; i < numCluster; i++){    sum_row[i] = 0;    for(int k = 0; k < numClass; k++)      sum_row[i] += confusionMatrix[i][k];  }  for(int k = 0; k < numClass; k++){    sum_col[k] = 0;    for(int i = 0; i < numCluster; i++)      sum_col[k] += confusionMatrix[i][k];  }  for(int i = 0; i < numCluster; i++)    for(int k = 0; k < numClass; k++)      if (confusionMatrix[i][k] > 0)        mut_info += confusionMatrix[i][k] * log(confusionMatrix[i][k] * numPoint * 1.0 / (sum_row[i] * sum_col[k]));  mut_info /= numPoint;  double hx = 0, hy = 0, min;  for (int i = 0; i < numCluster; i++)    if (sum_row[i] > 0)      hx += sum_row[i] * log(sum_row[i] * 1.0);      hx = log(numPoint * 1.0) - hx / numPoint;  for (int i = 0; i < numClass; i++)    if (sum_col[i] > 0)      hy += sum_col[i] * log(sum_col[i] * 1.0);  hy = log(numPoint * 1.0) - hy / numPoint;  //min = hx<hy?hx:hy;  min = (hx + hy) / 2;  if ((!os1) == 0)    os1 << "  (Normalized) MI of clusters = " << mut_info / min << endl;  if ((!os2) == 0)    os2 << "  (Normalized) MI of clusters = " << mut_info / min << endl;  if ((!os3) == 0)    os3 << "  (Normalized) MI of clusters = " << mut_info / min << endl;  delete [] sum_row;  delete [] sum_col;}void ExternalValidity::F_measure(ostream &os1, ostream &os2, ostream &os3){  double **Recall, **Precision, **F;  double F_value;   Recall = new double *[numCluster];  for (int i = 0; i < numCluster; i++)    Recall[i] = new double[numClass];  Precision = new double *[numCluster];  for (int i = 0; i < numCluster; i++)    Precision[i] = new double[numClass];  F = new (double *)[numCluster];  for (int i = 0; i < numCluster; i++)    F[i] = new double[numClass];  for (int i = 0; i < numCluster; i++)    for (int j = 0; j < numClass; j++)      Recall[i][j] = confusionMatrix[i][j] * 1.0 / classSize[j];  for (int i = 0; i < numCluster; i++)    for (int j = 0; j < numClass; j++)      Precision[i][j] = confusionMatrix[i][j] * 1.0 / clusterSize[i];  for (int i = 0; i < numCluster; i++)    for (int j = 0; j < numClass; j++)      F[i][j] = 2.0 * Recall[i][j] * Precision[i][j] / (Recall[i][j] + Precision[i][j]);  F_value = 0.0;  for (int j = 0; j < numClass; j++){    double temp_max = 0.0;    for (int i = 0; i < numCluster; i++)      if (temp_max < F[i][j])	temp_max = F[i][j];    F_value += temp_max * classSize[j];  }  F_value /= numPoint;  if ((!os1) == 0)    os1 << "  F-measure value of clusters = " << F_value << endl;  if ((!os2) == 0)    os2 << "  F-measure value of clusters = " << F_value << endl;  if ((!os3) == 0)    os3 << "  F-measure value of clusters = " << F_value << endl;  for(int i = 0; i < numCluster; i++){    delete [] Recall[i];    delete [] Precision[i];    delete [] F[i];  }  delete [] F;  delete [] Precision;  delete [] Recall;}void ExternalValidity::micro_avg_precision_recall(double &p_t, double &r_t, ostream &os1, ostream &os2, ostream &os3)  /* for the definition of micro-average precision/recall see paper "Unsupervised document classification     using sequential information maximization" by N. Slonim, N. Friedman and N. Tishby */{  int *uni_label, *alpha, *beta, *gamma;  uni_label = new int[numCluster];  for (int i = 0; i < numCluster; i++){    uni_label[i] = 0;    double temp = confusionMatrix[i][0];    for (int j = 1; j < numClass; j++)      if (temp < confusionMatrix[i][j]){        temp = confusionMatrix[i][j];	uni_label[i] = j;      }  }  alpha = new int[numClass];  beta = new int[numClass];  gamma = new int[numClass];  for (int j = 0; j < numClass; j++){    alpha[j] = 0;    beta[j] = 0;    gamma[j] = 0;  }  for (int i = 0; i < numPoint; i++)    if (uni_label[clusterLabel[i]] == classLabel[i])      alpha[classLabel[i]]++;    else {      beta[uni_label[clusterLabel[i]]]++;      gamma[classLabel[i]]++;    }  double temp = 0, temp1 = 0;  for (int j = 0; j < numClass; j++){    temp += alpha[j];    temp1 += beta[j];  }  temp1 += temp;  p_t = temp * 1.0 / temp1;  temp1 = 0;  for (int j = 0; j < numClass; j++)    temp1 += gamma[j];  temp1 += temp;  r_t = temp * 1.0 / temp1;  if ((!os1) == 0){    os1 << "  Micro-average Precision     = " << p_t << endl;    os1 << "  Micro-average Recall        = " << r_t << endl << endl;  }  if ((!os2) == 0){    os2 << "  Micro-average Precision     = " << p_t << endl;    os2 << "  Micro-average Recall        = " << r_t << endl << endl;  }  if ((!os3) == 0){    os3 << "  Micro-average Precision     = " << p_t << endl;    os3 << "  Micro-average Recall        = " << r_t << endl << endl;  }  delete [] uni_label;  delete [] alpha;  delete [] beta;  delete [] gamma;}    void ExternalValidity::getAccuracy(double &accuracy, ostream &os1, ostream &os2, ostream &os3)  /* This computes the general precision, sometimes called accuracy, which can be defined as:     Accuracy = 1/T (sum_{i=1}^l (t_i)) * 100,      where T denotes the total number of points,            l denotes the number of clusters,	   and t_i denotes the number of the points correctly clustered into a corresponding class i.     Notice that each t_i is a diagonal element of the corresponding confusion matrix whose cluster labels      are permuted so that sum of diagonal elements is maximized.  */ {  assert(numClass == numCluster);  vector<int> v;  // create the data...  for (int i = 0; i < numClass; i++)    v.push_back(i);  // permutate the data...  int maxTrace = 0;  do {    int tempTrace = 0;    for (int i = 0; i < numClass; i++)      tempTrace += confusionMatrix[v[i]][i];    if (tempTrace > maxTrace)      maxTrace = tempTrace;  } while (next_permutation(v.begin(), v.end()));  int tempSum = 0;  for (int i = 0; i < numClass; i++)    for (int j = 0; j < numClass; j++)      tempSum += confusionMatrix[i][j];  accuracy = (double)maxTrace / (double)tempSum;  if ((!os1) == 0)    os1 << "  Accuracy (= Precision)      = " << accuracy << endl << endl;  if ((!os2) == 0)    os2 << "  Accuracy (= Precision)      = " << accuracy << endl << endl;  if ((!os3) == 0)    os3 << "  Accuracy (= Precision)      = " << accuracy << endl << endl;}    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -