⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 coclustering.cc

📁 一种聚类算法,名字是cocluster
💻 CC
📖 第 1 页 / 共 3 页
字号:
/*  Coclustering.cc    Implementation of the super class of all co-cluster algorithms    Copyright (c) 2005, 2006              by Hyuk Cho    Copyright (c) 2003, 2004    	      by Hyuk Cho, Yuqiang Guan, and Suvrit Sra                {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <algorithm>#include <assert.h>#include <time.h>#include <stdlib.h>#include "Coclustering.h"#include "MatrixVector.h"Coclustering::Coclustering(Matrix *inputCCS, Matrix *inputCRS, commandLineArgument &myCLA){  assert(inputCCS != NULL && inputCRS != NULL);  assert(myCLA.numRowCluster > 0 && myCLA.numRowCluster <= inputCCS->getNumRow());  assert(myCLA.numColCluster > 0 && myCLA.numColCluster <= inputCCS->getNumCol());   isShowingEachCluster = myCLA.showingEachCluster;  isTakingReverse = myCLA.takingReverse;  isHavingVariation = false;  isComputingOneWayObjective = myCLA.computingOneWayObjective;  dumpLevel = myCLA.dumpLevel;  dumpAccessMode = myCLA.dumpAccessMode;  batchUpdateType = myCLA.batchUpdateType;  rowBatchUpdateThreshold = myCLA.rowBatchUpdateThreshold;  colBatchUpdateThreshold = myCLA.colBatchUpdateThreshold;  localSearchType = myCLA.localSearchType;  rowLocalSearchThreshold = myCLA.rowLocalSearchThreshold;  colLocalSearchThreshold = myCLA.colLocalSearchThreshold;  rowLocalSearchLength = myCLA.rowLocalSearchLength;  colLocalSearchLength = myCLA.colLocalSearchLength;  if (rowLocalSearchLength == RESUME_LOCAL_SEARCH)    isAvoidingEmptyRowCluster = true;  else    isAvoidingEmptyRowCluster = false;  if (colLocalSearchLength == RESUME_LOCAL_SEARCH)    isAvoidingEmptyColCluster = true;  else    isAvoidingEmptyColCluster = false;  hasReadRowSeedingFile = false;  hasReadColSeedingFile = false;    numRowCluster = myCLA.numRowCluster;  numColCluster = myCLA.numColCluster;  smoothingType = myCLA.smoothingType;  rowAnnealingFactor = myCLA.rowAnnealingFactor;		// not used...  colAnnealingFactor = myCLA.colAnnealingFactor;		// not used...  rowSmoothingFactor = myCLA.rowSmoothingFactor;		// not used...  colSmoothingFactor = myCLA.colSmoothingFactor;		// not used...  rowSmoothingFactor = myCLA.smoothingFactor;  colSmoothingFactor = myCLA.smoothingFactor;  perturbationMagnitude = myCLA.perturbationMagnitude;  rowSeedingOffsetType = myCLA.rowSeedingOffsetType;  colSeedingOffsetType = myCLA.colSeedingOffsetType;  numRowSeedingSet = myCLA.numRowSeedingSet;  numColSeedingSet = myCLA.numColSeedingSet;  rowSeedingAccessMode = myCLA.rowSeedingAccessMode;  colSeedingAccessMode = myCLA.colSeedingAccessMode;  numRowClass = myCLA.numRowClass;  numColClass = myCLA.numColClass;  rowClassLabel = myCLA.rowClassLabel;  colClassLabel = myCLA.colClassLabel;  rowInitializationMethod = myCLA.rowInitializationMethod;  colInitializationMethod = myCLA.colInitializationMethod;//  rowSeedingFilename = myCLA.rowSeedingFilename;//  colSeedingFilename = myCLA.colSeedingFilename;//  coclusterFilename = myCLA.coclusterFilename;  strcpy(rowSeedingFilename, myCLA.rowSeedingFilename);  strcpy(colSeedingFilename, myCLA.colSeedingFilename);  strcpy(coclusterFilename, myCLA.coclusterFilename);  strcpy(objectiveFilename, myCLA.objectiveFilename);  strcpy(dumpFilename, myCLA.dumpFilename);  strcpy(statisticsFilename, myCLA.statisticsFilename);  coclusterOffsetType = myCLA.coclusterOffsetType;  coclusterLabelType = myCLA.coclusterLabelType;  coclusterAccessMode = myCLA.coclusterAccessMode;  objectiveAccessMode = myCLA.objectiveAccessMode;  statisticsAccessMode = myCLA.statisticsAccessMode;    numRow = inputCCS->getNumRow();  numCol = inputCCS->getNumCol();  myCCS = inputCCS;  myCRS = inputCRS;  myCRS->setSmoothingFactor(myCLA.smoothingType, myCLA.rowSmoothingFactor);  myCCS->setSmoothingFactor(myCLA.smoothingType, myCLA.colSmoothingFactor);  myCRS->setAnnealingFactor(myCLA.rowAnnealingFactor);  myCCS->setAnnealingFactor(myCLA.colAnnealingFactor);  isSilent = false;		// not used...  isReversed = NULL;  rowV = colV = 0;  numIteration = 0;  numEmptyRowCluster = 0;  numEmptyColCluster = 0;  numSingletonRowCluster = 0;  numSingletonColCluster = 0;  numReversedRow = 0;  isRowMarked = new bool[numRow];  isColMarked = new bool[numCol];  memoryUsed = (numRow + numCol) * sizeof(bool);  Acompressed = new double *[numRowCluster];  for (int i = 0; i < numRowCluster; i++)    Acompressed[i] = new double[numColCluster];  memoryUsed = numRowCluster * numColCluster * sizeof(double);  numRowPermutation = myCLA.numRowPermutation;  numColPermutation = myCLA.numColPermutation;  doRowCLVecInitialization();   doColCLVecInitialization();      rowCL = new int[numRow];  colCL = new int[numCol];  rowCS = new int[numRowCluster];  colCS = new int[numColCluster];  memoryUsed += (numRow + numCol + numRowCluster + numColCluster) * sizeof(int);  if (isComputingOneWayObjective){    twoNormOfEachRow = new double[numRow];    twoNormOfEachCol = new double[numCol];    memoryUsed += (numRow + numCol) * sizeof(double);  }  if (rowSeedingAccessMode != NO_OPEN_MODE && numRowSeedingSet > 1){    if (rowSeedingAccessMode == BOTH_INPUT_MODE || rowSeedingAccessMode == ONE_INPUT_MODE)      rowSeedingFile.open(rowSeedingFilename, ios::in);    else      rowSeedingFile.open(rowSeedingFilename, ios::app);    if (!rowSeedingFile.is_open()){      cout << "  !!! RowSeeding file open error: " << rowSeedingFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  if (colSeedingAccessMode != NO_OPEN_MODE && numColSeedingSet > 1){    if (colSeedingAccessMode == ONE_INPUT_MODE )      colSeedingFile.open(colSeedingFilename, ios::in);    else      colSeedingFile.open(colSeedingFilename, ios::app);    if (!colSeedingFile.is_open()){      cout << "  !!! ColSeeding file open error: " << colSeedingFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  if (coclusterAccessMode != NO_OPEN_MODE){    if (coclusterAccessMode == OUTPUT_MODE)      coclusterFile.open(coclusterFilename, ios::out);    else      coclusterFile.open(coclusterFilename, ios::app);    if (!coclusterFile.is_open()){      cout << "  !!! Cocluster file open error: " << coclusterFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  if (dumpAccessMode != NO_OPEN_MODE){    if (dumpAccessMode == OUTPUT_MODE)      dumpFile.open(dumpFilename, ios::out);    else      dumpFile.open(dumpFilename, ios::app);    if (!dumpFile.is_open()){      cout << "  !!! Dump file open error: " << dumpFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  if (objectiveAccessMode != NO_OPEN_MODE){    if (objectiveAccessMode == OUTPUT_MODE)      objectiveFile.open(objectiveFilename, ios::out);    else      objectiveFile.open(objectiveFilename, ios::app);    if (!objectiveFile.is_open()){      cout << "  !!! Objective file open error: " << objectiveFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  if (statisticsAccessMode != NO_OPEN_MODE){    if (statisticsAccessMode == OUTPUT_MODE)      statisticsFile.open(statisticsFilename, ios::out);    else      statisticsFile.open(statisticsFilename, ios::app);    if (!statisticsFile.is_open()){      cout << "  !!! Statistics file open error: " << statisticsFilename << " !!!" << endl;      exit(EXIT_FAILURE);    }  }  isEmptyRowClusterReported = isEmptyColClusterReported = false ;  randNumGenerator.Set((unsigned)time(NULL));}Coclustering::~Coclustering(){  delete [] isRowMarked;  delete [] isColMarked;  delete [] rowCL;  delete [] colCL;  delete [] rowCS;  delete [] colCS;  for (int i = 0; i < numRowCluster; i++)    delete [] Acompressed[i];  delete [] Acompressed;  if (isComputingOneWayObjective){    delete [] twoNormOfEachRow;    delete [] twoNormOfEachCol;  }  if ((rowSeedingAccessMode == BOTH_INPUT_MODE || rowSeedingAccessMode == ONE_INPUT_MODE) && numRowSeedingSet > 1)    rowSeedingFile.close();   if ((colSeedingAccessMode == ONE_INPUT_MODE) && numColSeedingSet > 1)    colSeedingFile.close();   if (coclusterAccessMode != NO_OPEN_MODE)    coclusterFile.close();   if (dumpAccessMode != NO_OPEN_MODE)    dumpFile.close();   if (objectiveAccessMode != NO_OPEN_MODE)    objectiveFile.close();  if (statisticsAccessMode != NO_OPEN_MODE)    statisticsFile.close(); }int Coclustering::getEmptyRC(){  return numEmptyRowCluster;}int Coclustering::getEmptyCC(){  return numEmptyColCluster;}int Coclustering::getSingletonRC(){  return numSingletonRowCluster;}int Coclustering::getSingletonCC(){  return numSingletonColCluster;}double Coclustering::getObjValue(){  return objValue;}double Coclustering::getObjValue4RowCluster(){  return objValue4RowCluster;}double Coclustering:: getObjValue4ColCluster(){  return objValue4ColCluster;}int Coclustering::getNumIteration(){  return numIteration;}int Coclustering::getNumReversedRow(){  return numReversedRow;}void Coclustering::chooseInitializationMethod(){  int tempRowClass, tempColClass;  if (((rowInitializationMethod == SEEDING_INIT) && (colInitializationMethod == SEEDING_INIT))       && (strcmp(rowSeedingFilename, colSeedingFilename) == 0)){    if (!hasReadRowSeedingFile && (numRowSeedingSet == 1) && (numColSeedingSet == 1)){      readLabel(rowSeedingFilename, numRow, numCol, rowCL, colCL, tempRowClass, tempColClass, rowSeedingOffsetType);         hasReadRowSeedingFile = true;    }    if ((numRowSeedingSet > 1) && (numColSeedingSet > 1)){      readLabel(rowSeedingFile, numRow, numCol, rowCL, colCL, tempRowClass, tempColClass, rowSeedingOffsetType);    }  } else {    if (numRowCluster == 1 || numRowCluster == numRow){      doRowRandomInitializationModified();    } else {      switch (rowInitializationMethod){        case RANDOM_INIT://          doRowRandomInitialization();          doRowRandomInitializationModified();          break;        case RANDOM_PERTURB_INIT:          doRowRandomPerturbInitialization();          break;        case FARTHEST_INIT:          doRowFarthestInitialization();          break;        case SEEDING_INIT://          if (strcmp(rowSeedingFilename, EMPTY_STRING) != 0 && strcmp(colSeedingFilename, EMPTY_STRING) == 0)          if (strcmp(rowSeedingFilename, EMPTY_STRING) != 0){	    if (!hasReadRowSeedingFile && (numRowSeedingSet == 1)){	      tempRowClass = readLabel(rowSeedingFilename, numRow, rowCL, rowSeedingOffsetType);                      hasReadRowSeedingFile = true;            }	    if (numRowSeedingSet > 1){	      readLabel(rowSeedingFile, numRow, rowCL, tempRowClass, rowSeedingOffsetType);             }	  }	  break;	case PERMUTATION_INIT:	  doRowPermutationInitialization();	  break;             default:          doRowRandomInitializationModified();          break;      }    }    if ((numColCluster == 1) || (numColCluster == numCol)){      doColRandomInitializationModified();    } else {      switch (colInitializationMethod){        case RANDOM_INIT://          doColRandomInitialization();          doColRandomInitializationModified();          break;        case RANDOM_PERTURB_INIT:          doColRandomPerturbInitialization();          break;        case FARTHEST_INIT:          doColFarthestInitialization();          break;        case SEEDING_INIT://          if (strcmp(colSeedingFilename, EMPTY_STRING) != 0 && strcmp(rowSeedingFilename, EMPTY_STRING) == 0)          if (strcmp(colSeedingFilename, EMPTY_STRING) != 0){	    if (!hasReadColSeedingFile && (numColSeedingSet == 1)){	      tempColClass = readLabel(colSeedingFilename, numCol, colCL, colSeedingOffsetType);              hasReadColSeedingFile = true;            }	    if (numColSeedingSet > 1){	      readLabel(colSeedingFile, numCol, colCL, tempColClass, colSeedingOffsetType);	    }          }	  break;	case PERMUTATION_INIT:	  doColPermutationInitialization();	  break;        default:          doColRandomInitializationModified();          break;      }    }  }}void Coclustering::doRowRandomInitialization(){  bool *mark = new bool[numRowCluster], isEnough = true;  for (int i = 0; i < numRowCluster; i++)    mark[i] = false;  for (int i = 0; i < numRow; i++){    rowCL[i] = randNumGenerator.GetUniformInt(numRowCluster);    mark[rowCL[i]] = true;  }  for (int i = 0; i < numRowCluster; i++)    if (mark[i] == false){      isEnough = false;      break;    }  if (isEnough == false)    for (int j = 0; j < numRowCluster; j++)      rowCL[j] = j;  delete [] mark;}void Coclustering::doColRandomInitialization(){  bool *mark = new bool[numColCluster], isEnough = true;  for (int i = 0; i < numColCluster; i++)    mark[i] = false;  for (int i = 0; i < numCol; i++){    colCL[i] = randNumGenerator.GetUniformInt(numColCluster);    mark[colCL[i]] = true;  }  for (int i = 0; i < numColCluster; i++)    if (mark[i] == false){      isEnough = false;      break;    }  if (isEnough == false)    for (int j = 0; j < numColCluster; j++)      colCL[j] = j;  delete [] mark;}void Coclustering::doRowRandomInitializationModified(){  if (numRowCluster == 1)    for (int i = 0; i < numRow; i++)      rowCL[i] = 0;  else if (numRowCluster == numRow)    doRowRandomInitializationDirect();  else {    for (int i = 0; i < numRow; i++)      rowCL[i] = i % numRowCluster;    for (int i = 0; i < numRow; i++){      int j = randNumGenerator.GetUniformInt(numRow);      if (i != j){        int temp = rowCL[i];        rowCL[i] = rowCL[j];        rowCL[j] = temp;      }    }  }}void Coclustering::doColRandomInitializationModified(){  if (numColCluster == 1)    for (int i = 0; i < numCol; i++)      colCL[i] = 0;  else if (numColCluster == numCol)    doColRandomInitializationDirect();  else {    for (int i = 0; i < numCol; i++)      colCL[i] = i % numColCluster;    for (int i = 0; i < numCol; i++){      int j = randNumGenerator.GetUniformInt(numCol);      if (i != j){        int temp = colCL[i];        colCL[i] = colCL[j];        colCL[j] = temp;      }    }  }}void Coclustering::doRowRandomInitializationDirect(){  for (int i = 0; i < numRow; i++)    rowCL[i] = i;}void Coclustering::doColRandomInitializationDirect(){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -