⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 coclustering.cc

📁 一种聚类算法,名字是cocluster
💻 CC
📖 第 1 页 / 共 3 页
字号:
  for (int i = 0; i < numCol; i++)    colCL[i] = i;}void Coclustering::doSeedingInitializationI(char * seedingFilename){  int rn, cn, ID;  std::ifstream gpfile(seedingFilename);  if (gpfile.is_open()){    cout << "  Reading cluster labels: " << seedingFilename << endl;    for (int i = 0; i < numRowCluster; i++)      for (int j = 0; j < numColCluster; j++){        gpfile >> rn >> cn;        for (int k = 0; k < rn; k++){          gpfile >> ID;          rowCL[ID] = i;        }        for (int k = 0; k < cn; k++){          gpfile >> ID;          colCL[ID] = j;        }      }  } else {    cout << "  !!! Seeding file open error: " << seedingFilename << " !!!" << endl << endl;    exit(EXIT_FAILURE);  }/*  {     cout << "Seeding file open error: " << seedingFilename << endl << ". So doing random initialization" << endl;    randomInitial(numRowCluster, numRow, rowCL);    randomInitial(numColCluster, numCol, colCL);  }*/}void Coclustering::doSeedingInitializationII(char * seedingFilename){  std::ifstream gpfile(seedingFilename);  if (gpfile.is_open()){    cout << "  Reading cluster labels: " << seedingFilename << endl;    for (int i = 0; i < numRow; i++)      gpfile >> rowCL[i];    for (int j = 0; j < numCol; j++)      gpfile >> colCL[j];  } else {    cout << "  !!! Seeding file open error: " << seedingFilename << " !!!" << endl << endl;    exit(EXIT_FAILURE);  }/*  {     cout << "Seeding file open error: " << seedingFilename << endl << "So doing random initialization" << endl;    randomInitial(numRowCluster, numRow, rowCL);    randomInitial(numColCluster, numCol, colCL);  }*/}void Coclustering::doRowCLVecInitialization(){  if (numRowCluster == 1)    for (int i = 0; i < numRow; i++)      rowCLVec.push_back(0);  else if (numRowCluster == numRow)    for (int i = 0; i < numRow; i++)      rowCLVec.push_back(i);  else    for (int i = 0; i < numRow; i++)      rowCLVec.push_back(i % numRowCluster);}void Coclustering::doColCLVecInitialization(){  if (numColCluster == 1)    for (int i = 0; i < numCol; i++)      colCLVec.push_back(0);  else if (numColCluster == numCol)    for (int i = 0; i < numCol; i++)      colCLVec.push_back(i);  else    for (int i = 0; i < numCol; i++)      colCLVec.push_back(i % numColCluster);}void Coclustering::doRowPermutationInitialization(){  bool has_next_permutation = true;  if ((numRowCluster != 1) && (numRowCluster != numRow))    for (int i = 0; i < numRowPermutation && has_next_permutation; i++)      has_next_permutation = next_permutation(rowCLVec.begin(), rowCLVec.end());  assert(has_next_permutation);  for (int i = 0; i < numRow; i++)    rowCL[i] = rowCLVec[i];}void Coclustering::doColPermutationInitialization(){  bool  has_next_permutation = true;  if ((numColCluster != 1) && (numColCluster != numCol))    for (int i = 0; i < numColPermutation && has_next_permutation; i++)      has_next_permutation = next_permutation(colCLVec.begin(), colCLVec.end());  assert(has_next_permutation);  for (int i = 0; i < numCol; i++)    colCL[i] = colCLVec[i];}void Coclustering::checkHavingReversedRow(){  bool havingReversed = false;  for (int r = 0; r < numRow; r++)    if (rowCL[r] < 0){      havingReversed = true;      break;    }  if (havingReversed){    if (!isTakingReverse){      isTakingReverse = true;      isReversed = new bool[numRow];      memoryUsed += numRow * sizeof(bool);    }      for (int r = 0; r < numRow; r++)      if (rowCL[r] < 0){        isReversed[r] = true;        rowCL[r] *= -1;      } else        isReversed[r] = false;  }}    void Coclustering::updateVariable(double &minDistance, int &minCL, double tempDistance, int tempCL){  if (tempDistance < minDistance){    minDistance = tempDistance;    minCL = tempCL;  }}void Coclustering::updateVariable(double &minDistance, int &minCL, bool &tempIsReversed, double tempDistance, int tempCL, bool trueOrFalse){  if (tempDistance < minDistance){    minDistance = tempDistance;    minCL = tempCL;    tempIsReversed = trueOrFalse;  }}void Coclustering::computeNumReversedRow(){  numReversedRow = 0;  for (int r = 0; r < numRow; r++)    if (isReversed[r])      numReversedRow++;}void Coclustering::adjustClusterLabel(int value){  for (int r = 0; r < numRow; r++)    rowCL[r] += value;  for (int c = 0; c < numCol; c++)    colCL[c] += value;}  void Coclustering::computeRowClusterSize(){  for (int i = 0; i < numRowCluster; i++)    rowCS[i] = 0;  for (int i = 0; i < numRow; i++)    rowCS[rowCL[i]]++;  numEmptyRowCluster = 0;  numSingletonRowCluster = 0;  for (int i = 0; i < numRowCluster; i++){    if (rowCS[i] == 0)      numEmptyRowCluster++;			// count # of empty row cluster(s)    else if (rowCS[i] == 1)      numSingletonRowCluster++;			// count # of singleton row cluster(s)  }  if (numEmptyRowCluster > 0){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)        statisticsFile << "  ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl;        isEmptyRowClusterReported = true;  } else if (isEmptyRowClusterReported){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  !!! Fixing empty row cluster(s) !!!" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  !!! Fixing empty row cluster(s) !!!" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)      statisticsFile << "  !!! Fixing empty row cluster(s) !!!" << endl;    isEmptyRowClusterReported = false;  }  if (numSingletonRowCluster > 0){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)      statisticsFile << "  ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl;  }}void Coclustering::computeColClusterSize(){  for (int i = 0; i < numColCluster; i++)    colCS[i] = 0;  for (int i = 0; i < numCol; i++)    colCS[colCL[i]]++;  numEmptyColCluster = 0;  numSingletonColCluster = 0;  for (int i = 0; i < numColCluster; i++){    if (colCS[i] == 0)      numEmptyColCluster++;			// count # of empty column cluster(s)    else if (colCS[i] == 1)      numSingletonColCluster++;			// count # of singleton row cluster(s)  }  if (numEmptyColCluster > 0){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)      statisticsFile << endl << "  ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl;    isEmptyColClusterReported = true;  } else if (isEmptyColClusterReported){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  !!! Fixing empty col cluster(s) !!!" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  !!! Fixing empty col cluster(s) !!!" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)      statisticsFile << "  !!! Fixing empty col cluster(s) !!!" << endl;    isEmptyColClusterReported = false;  }  if (numSingletonColCluster > 0){    switch (dumpLevel){      case MINIMUM_DUMP_LEVEL:      case BATCH_UPDATE_DUMP_LEVEL:        break;      case LOCAL_SEARCH_DUMP_LEVEL:        cout << "  ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl;        break;      case MAXIMUM_DUMP_LEVEL:        dumpFile << "  ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl;        break;    }    if (statisticsAccessMode != NO_OPEN_MODE)      statisticsFile << "  ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl;  }}void Coclustering::removeEmptyCluster(){  int *rowClusterNewLabel = new int[numRowCluster], *colClusterNewLabel = new int[numColCluster];  int tmp_label = 0;  if (numEmptyRowCluster > 0){    for (int i = 0; i < numRowCluster; i++){      if (rowCS[i] > 0){        rowClusterNewLabel[i]= tmp_label;        tmp_label++;      }    }    for (int i = 0; i < numRow; i++)      rowCL[i] = rowClusterNewLabel[rowCL[i]];  }  tmp_label = 0;  if (numEmptyColCluster > 0){    for (int i = 0; i < numColCluster; i++){      if (colCS[i] > 0){        colClusterNewLabel[i] = tmp_label;        tmp_label++;      }    }    for (int i = 0; i < numCol; i++)      colCL[i] = colClusterNewLabel[colCL[i]];  }  delete [] rowClusterNewLabel;  delete [] colClusterNewLabel;}void Coclustering::writeCocluster(){  removeEmptyCluster();  computeRowClusterSize();  computeColClusterSize();  if (coclusterLabelType == BLOCK_FORMAT){    int *rowChecker = new int[numRow];    int *colChecker = new int[numCol];    int *rowBin = new int[numRowCluster];    int *colBin = new int[numColCluster];    rowBin[0] = 0;    for (int i = 1; i < numRowCluster; i++)      rowBin[i] = rowBin[i-1] + rowCS[i-1];    colBin[0] = 0;    for (int j = 1; j < numColCluster; j++)      colBin[j] = colBin[j-1] + colCS[j-1];    for (int i = 0; i < numRow; i++){      rowChecker[rowBin[rowCL[i]]] = i;      rowBin[rowCL[i]]++;    }    for (int i = 0; i < numCol; i++){      colChecker[colBin[colCL[i]]] = i;      colBin[colCL[i]]++;    }    int rowIndex = 0, colIndex = 0;    for (int i = 0; i < numRowCluster; i++){      colIndex = 0;      if (rowCS[i] > 0){	for (int j = 0; j < numColCluster; j++){	  if (colCS[j] > 0){	    coclusterFile << rowCS[i] << " " << colCS[j] << endl;	    for (int k = rowIndex; k < rowIndex+rowCS[i]; k++){	      if (isTakingReverse){		if (isReversed[rowChecker[k]])		  coclusterFile << "-";        	coclusterFile << (rowChecker[k]+1) << " ";              } else if (coclusterOffsetType == START_FROM_0){        	coclusterFile << rowChecker[k] << " ";	      } else if (coclusterOffsetType == START_FROM_1){        	coclusterFile << (rowChecker[k]+1) << " ";              }            }            coclusterFile << endl;	    for (int k = colIndex; k < colIndex+colCS[j]; k++){	      if (coclusterOffsetType == START_FROM_0)        	coclusterFile << colChecker[k] << " ";              else if (coclusterOffsetType == START_FROM_1)        	coclusterFile << (colChecker[k]+1) << " ";	    }	    coclusterFile << endl;	  }	  colIndex += colCS[j];	}      }      rowIndex += rowCS[i];    }    delete [] rowChecker;    delete [] colChecker;    delete [] rowBin;    delete [] colBin;  } else {//	removeEmptyCluster();    for (int i = 0; i < numRow; i++){      if (isTakingReverse){	if (isReversed[i])	  coclusterFile << "-";        coclusterFile << (rowCL[i]+1) << " ";      } else if (coclusterOffsetType == START_FROM_0){        coclusterFile << rowCL[i] << " ";      } else 	coclusterFile << (rowCL[i]+1) << " ";    }    coclusterFile << endl;    for (int j = 0; j < numCol; j++){      if (coclusterOffsetType == START_FROM_0)	coclusterFile << colCL[j] << " ";      else        coclusterFile << (colCL[j]+1) << " ";    }    coclusterFile << endl;  }}void Coclustering::setSilent(bool s)	// not used...{  isSilent = s;}void Coclustering::setRowSmoothingFactor(double p){  rowSmoothingFactor = p;}void Coclustering::setColSmoothingFactor(double p){  colSmoothingFactor = p;}void Coclustering::computeAcompressed(){  myCCS->condenseMatrix(rowCL, colCL, numRowCluster, numColCluster, Acompressed);}void Coclustering::computeAcompressed(bool *isReversed){  myCCS->condenseMatrix(rowCL, colCL, numRowCluster, numColCluster, Acompressed, isReversed);}void Coclustering::validateRowCluster(int numRowClass, int *rowClassLabel){  ExternalValidity ev(numRowClass, numRowCluster, numRow, rowClassLabel, rowCL);/*//  if (dumpLevel > MINIMUM_DUMP_LEVEL){    cout << endl << "  ### External Row Cluster Validation ###" << endl << endl;    if (isShowingEachCluster)      ev.printCM(cout);    ev.purity_Entropy_MutInfo(cout, isShowingEachCluster);    ev.F_measure(cout);    ev.micro_avg_precision_recall(rowPrecision, rowRecall, cout);//  }  if (statisticsAccessMode != NO_OPEN_MODE){    statisticsFile << endl << "  ### External Row Cluster Validation ###" << endl << endl;    if (isShowingEachCluster)      ev.printCM(statisticsFile);    ev.purity_Entropy_MutInfo(statisticsFile, isShowingEachCluster);    ev.F_measure(statisticsFile);    ev.micro_avg_precision_recall(rowPrecision, rowRecall, statisticsFile);  }  if (dumpAccessMode != NO_OPEN_MODE){    dumpFile << endl << "  ### External Row Cluster Validation ###" << endl << endl;    if (isShowingEachCluster)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -