📄 coclustering.cc
字号:
/* Coclustering.cc Implementation of the super class of all co-cluster algorithms Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <algorithm>#include <assert.h>#include <time.h>#include <stdlib.h>#include "Coclustering.h"#include "MatrixVector.h"Coclustering::Coclustering(Matrix *inputCCS, Matrix *inputCRS, commandLineArgument &myCLA){ assert(inputCCS != NULL && inputCRS != NULL); assert(myCLA.numRowCluster > 0 && myCLA.numRowCluster <= inputCCS->getNumRow()); assert(myCLA.numColCluster > 0 && myCLA.numColCluster <= inputCCS->getNumCol()); isShowingEachCluster = myCLA.showingEachCluster; isTakingReverse = myCLA.takingReverse; isHavingVariation = false; isComputingOneWayObjective = myCLA.computingOneWayObjective; dumpLevel = myCLA.dumpLevel; dumpAccessMode = myCLA.dumpAccessMode; batchUpdateType = myCLA.batchUpdateType; rowBatchUpdateThreshold = myCLA.rowBatchUpdateThreshold; colBatchUpdateThreshold = myCLA.colBatchUpdateThreshold; localSearchType = myCLA.localSearchType; rowLocalSearchThreshold = myCLA.rowLocalSearchThreshold; colLocalSearchThreshold = myCLA.colLocalSearchThreshold; rowLocalSearchLength = myCLA.rowLocalSearchLength; colLocalSearchLength = myCLA.colLocalSearchLength; if (rowLocalSearchLength == RESUME_LOCAL_SEARCH) isAvoidingEmptyRowCluster = true; else isAvoidingEmptyRowCluster = false; if (colLocalSearchLength == RESUME_LOCAL_SEARCH) isAvoidingEmptyColCluster = true; else isAvoidingEmptyColCluster = false; hasReadRowSeedingFile = false; hasReadColSeedingFile = false; numRowCluster = myCLA.numRowCluster; numColCluster = myCLA.numColCluster; smoothingType = myCLA.smoothingType; rowAnnealingFactor = myCLA.rowAnnealingFactor; // not used... colAnnealingFactor = myCLA.colAnnealingFactor; // not used... rowSmoothingFactor = myCLA.rowSmoothingFactor; // not used... colSmoothingFactor = myCLA.colSmoothingFactor; // not used... rowSmoothingFactor = myCLA.smoothingFactor; colSmoothingFactor = myCLA.smoothingFactor; perturbationMagnitude = myCLA.perturbationMagnitude; rowSeedingOffsetType = myCLA.rowSeedingOffsetType; colSeedingOffsetType = myCLA.colSeedingOffsetType; numRowSeedingSet = myCLA.numRowSeedingSet; numColSeedingSet = myCLA.numColSeedingSet; rowSeedingAccessMode = myCLA.rowSeedingAccessMode; colSeedingAccessMode = myCLA.colSeedingAccessMode; numRowClass = myCLA.numRowClass; numColClass = myCLA.numColClass; rowClassLabel = myCLA.rowClassLabel; colClassLabel = myCLA.colClassLabel; rowInitializationMethod = myCLA.rowInitializationMethod; colInitializationMethod = myCLA.colInitializationMethod;// rowSeedingFilename = myCLA.rowSeedingFilename;// colSeedingFilename = myCLA.colSeedingFilename;// coclusterFilename = myCLA.coclusterFilename; strcpy(rowSeedingFilename, myCLA.rowSeedingFilename); strcpy(colSeedingFilename, myCLA.colSeedingFilename); strcpy(coclusterFilename, myCLA.coclusterFilename); strcpy(objectiveFilename, myCLA.objectiveFilename); strcpy(dumpFilename, myCLA.dumpFilename); strcpy(statisticsFilename, myCLA.statisticsFilename); coclusterOffsetType = myCLA.coclusterOffsetType; coclusterLabelType = myCLA.coclusterLabelType; coclusterAccessMode = myCLA.coclusterAccessMode; objectiveAccessMode = myCLA.objectiveAccessMode; statisticsAccessMode = myCLA.statisticsAccessMode; numRow = inputCCS->getNumRow(); numCol = inputCCS->getNumCol(); myCCS = inputCCS; myCRS = inputCRS; myCRS->setSmoothingFactor(myCLA.smoothingType, myCLA.rowSmoothingFactor); myCCS->setSmoothingFactor(myCLA.smoothingType, myCLA.colSmoothingFactor); myCRS->setAnnealingFactor(myCLA.rowAnnealingFactor); myCCS->setAnnealingFactor(myCLA.colAnnealingFactor); isSilent = false; // not used... isReversed = NULL; rowV = colV = 0; numIteration = 0; numEmptyRowCluster = 0; numEmptyColCluster = 0; numSingletonRowCluster = 0; numSingletonColCluster = 0; numReversedRow = 0; isRowMarked = new bool[numRow]; isColMarked = new bool[numCol]; memoryUsed = (numRow + numCol) * sizeof(bool); Acompressed = new double *[numRowCluster]; for (int i = 0; i < numRowCluster; i++) Acompressed[i] = new double[numColCluster]; memoryUsed = numRowCluster * numColCluster * sizeof(double); numRowPermutation = myCLA.numRowPermutation; numColPermutation = myCLA.numColPermutation; doRowCLVecInitialization(); doColCLVecInitialization(); rowCL = new int[numRow]; colCL = new int[numCol]; rowCS = new int[numRowCluster]; colCS = new int[numColCluster]; memoryUsed += (numRow + numCol + numRowCluster + numColCluster) * sizeof(int); if (isComputingOneWayObjective){ twoNormOfEachRow = new double[numRow]; twoNormOfEachCol = new double[numCol]; memoryUsed += (numRow + numCol) * sizeof(double); } if (rowSeedingAccessMode != NO_OPEN_MODE && numRowSeedingSet > 1){ if (rowSeedingAccessMode == BOTH_INPUT_MODE || rowSeedingAccessMode == ONE_INPUT_MODE) rowSeedingFile.open(rowSeedingFilename, ios::in); else rowSeedingFile.open(rowSeedingFilename, ios::app); if (!rowSeedingFile.is_open()){ cout << " !!! RowSeeding file open error: " << rowSeedingFilename << " !!!" << endl; exit(EXIT_FAILURE); } } if (colSeedingAccessMode != NO_OPEN_MODE && numColSeedingSet > 1){ if (colSeedingAccessMode == ONE_INPUT_MODE ) colSeedingFile.open(colSeedingFilename, ios::in); else colSeedingFile.open(colSeedingFilename, ios::app); if (!colSeedingFile.is_open()){ cout << " !!! ColSeeding file open error: " << colSeedingFilename << " !!!" << endl; exit(EXIT_FAILURE); } } if (coclusterAccessMode != NO_OPEN_MODE){ if (coclusterAccessMode == OUTPUT_MODE) coclusterFile.open(coclusterFilename, ios::out); else coclusterFile.open(coclusterFilename, ios::app); if (!coclusterFile.is_open()){ cout << " !!! Cocluster file open error: " << coclusterFilename << " !!!" << endl; exit(EXIT_FAILURE); } } if (dumpAccessMode != NO_OPEN_MODE){ if (dumpAccessMode == OUTPUT_MODE) dumpFile.open(dumpFilename, ios::out); else dumpFile.open(dumpFilename, ios::app); if (!dumpFile.is_open()){ cout << " !!! Dump file open error: " << dumpFilename << " !!!" << endl; exit(EXIT_FAILURE); } } if (objectiveAccessMode != NO_OPEN_MODE){ if (objectiveAccessMode == OUTPUT_MODE) objectiveFile.open(objectiveFilename, ios::out); else objectiveFile.open(objectiveFilename, ios::app); if (!objectiveFile.is_open()){ cout << " !!! Objective file open error: " << objectiveFilename << " !!!" << endl; exit(EXIT_FAILURE); } } if (statisticsAccessMode != NO_OPEN_MODE){ if (statisticsAccessMode == OUTPUT_MODE) statisticsFile.open(statisticsFilename, ios::out); else statisticsFile.open(statisticsFilename, ios::app); if (!statisticsFile.is_open()){ cout << " !!! Statistics file open error: " << statisticsFilename << " !!!" << endl; exit(EXIT_FAILURE); } } isEmptyRowClusterReported = isEmptyColClusterReported = false ; randNumGenerator.Set((unsigned)time(NULL));}Coclustering::~Coclustering(){ delete [] isRowMarked; delete [] isColMarked; delete [] rowCL; delete [] colCL; delete [] rowCS; delete [] colCS; for (int i = 0; i < numRowCluster; i++) delete [] Acompressed[i]; delete [] Acompressed; if (isComputingOneWayObjective){ delete [] twoNormOfEachRow; delete [] twoNormOfEachCol; } if ((rowSeedingAccessMode == BOTH_INPUT_MODE || rowSeedingAccessMode == ONE_INPUT_MODE) && numRowSeedingSet > 1) rowSeedingFile.close(); if ((colSeedingAccessMode == ONE_INPUT_MODE) && numColSeedingSet > 1) colSeedingFile.close(); if (coclusterAccessMode != NO_OPEN_MODE) coclusterFile.close(); if (dumpAccessMode != NO_OPEN_MODE) dumpFile.close(); if (objectiveAccessMode != NO_OPEN_MODE) objectiveFile.close(); if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile.close(); }int Coclustering::getEmptyRC(){ return numEmptyRowCluster;}int Coclustering::getEmptyCC(){ return numEmptyColCluster;}int Coclustering::getSingletonRC(){ return numSingletonRowCluster;}int Coclustering::getSingletonCC(){ return numSingletonColCluster;}double Coclustering::getObjValue(){ return objValue;}double Coclustering::getObjValue4RowCluster(){ return objValue4RowCluster;}double Coclustering:: getObjValue4ColCluster(){ return objValue4ColCluster;}int Coclustering::getNumIteration(){ return numIteration;}int Coclustering::getNumReversedRow(){ return numReversedRow;}void Coclustering::chooseInitializationMethod(){ int tempRowClass, tempColClass; if (((rowInitializationMethod == SEEDING_INIT) && (colInitializationMethod == SEEDING_INIT)) && (strcmp(rowSeedingFilename, colSeedingFilename) == 0)){ if (!hasReadRowSeedingFile && (numRowSeedingSet == 1) && (numColSeedingSet == 1)){ readLabel(rowSeedingFilename, numRow, numCol, rowCL, colCL, tempRowClass, tempColClass, rowSeedingOffsetType); hasReadRowSeedingFile = true; } if ((numRowSeedingSet > 1) && (numColSeedingSet > 1)){ readLabel(rowSeedingFile, numRow, numCol, rowCL, colCL, tempRowClass, tempColClass, rowSeedingOffsetType); } } else { if (numRowCluster == 1 || numRowCluster == numRow){ doRowRandomInitializationModified(); } else { switch (rowInitializationMethod){ case RANDOM_INIT:// doRowRandomInitialization(); doRowRandomInitializationModified(); break; case RANDOM_PERTURB_INIT: doRowRandomPerturbInitialization(); break; case FARTHEST_INIT: doRowFarthestInitialization(); break; case SEEDING_INIT:// if (strcmp(rowSeedingFilename, EMPTY_STRING) != 0 && strcmp(colSeedingFilename, EMPTY_STRING) == 0) if (strcmp(rowSeedingFilename, EMPTY_STRING) != 0){ if (!hasReadRowSeedingFile && (numRowSeedingSet == 1)){ tempRowClass = readLabel(rowSeedingFilename, numRow, rowCL, rowSeedingOffsetType); hasReadRowSeedingFile = true; } if (numRowSeedingSet > 1){ readLabel(rowSeedingFile, numRow, rowCL, tempRowClass, rowSeedingOffsetType); } } break; case PERMUTATION_INIT: doRowPermutationInitialization(); break; default: doRowRandomInitializationModified(); break; } } if ((numColCluster == 1) || (numColCluster == numCol)){ doColRandomInitializationModified(); } else { switch (colInitializationMethod){ case RANDOM_INIT:// doColRandomInitialization(); doColRandomInitializationModified(); break; case RANDOM_PERTURB_INIT: doColRandomPerturbInitialization(); break; case FARTHEST_INIT: doColFarthestInitialization(); break; case SEEDING_INIT:// if (strcmp(colSeedingFilename, EMPTY_STRING) != 0 && strcmp(rowSeedingFilename, EMPTY_STRING) == 0) if (strcmp(colSeedingFilename, EMPTY_STRING) != 0){ if (!hasReadColSeedingFile && (numColSeedingSet == 1)){ tempColClass = readLabel(colSeedingFilename, numCol, colCL, colSeedingOffsetType); hasReadColSeedingFile = true; } if (numColSeedingSet > 1){ readLabel(colSeedingFile, numCol, colCL, tempColClass, colSeedingOffsetType); } } break; case PERMUTATION_INIT: doColPermutationInitialization(); break; default: doColRandomInitializationModified(); break; } } }}void Coclustering::doRowRandomInitialization(){ bool *mark = new bool[numRowCluster], isEnough = true; for (int i = 0; i < numRowCluster; i++) mark[i] = false; for (int i = 0; i < numRow; i++){ rowCL[i] = randNumGenerator.GetUniformInt(numRowCluster); mark[rowCL[i]] = true; } for (int i = 0; i < numRowCluster; i++) if (mark[i] == false){ isEnough = false; break; } if (isEnough == false) for (int j = 0; j < numRowCluster; j++) rowCL[j] = j; delete [] mark;}void Coclustering::doColRandomInitialization(){ bool *mark = new bool[numColCluster], isEnough = true; for (int i = 0; i < numColCluster; i++) mark[i] = false; for (int i = 0; i < numCol; i++){ colCL[i] = randNumGenerator.GetUniformInt(numColCluster); mark[colCL[i]] = true; } for (int i = 0; i < numColCluster; i++) if (mark[i] == false){ isEnough = false; break; } if (isEnough == false) for (int j = 0; j < numColCluster; j++) colCL[j] = j; delete [] mark;}void Coclustering::doRowRandomInitializationModified(){ if (numRowCluster == 1) for (int i = 0; i < numRow; i++) rowCL[i] = 0; else if (numRowCluster == numRow) doRowRandomInitializationDirect(); else { for (int i = 0; i < numRow; i++) rowCL[i] = i % numRowCluster; for (int i = 0; i < numRow; i++){ int j = randNumGenerator.GetUniformInt(numRow); if (i != j){ int temp = rowCL[i]; rowCL[i] = rowCL[j]; rowCL[j] = temp; } } }}void Coclustering::doColRandomInitializationModified(){ if (numColCluster == 1) for (int i = 0; i < numCol; i++) colCL[i] = 0; else if (numColCluster == numCol) doColRandomInitializationDirect(); else { for (int i = 0; i < numCol; i++) colCL[i] = i % numColCluster; for (int i = 0; i < numCol; i++){ int j = randNumGenerator.GetUniformInt(numCol); if (i != j){ int temp = colCL[i]; colCL[i] = colCL[j]; colCL[j] = temp; } } }}void Coclustering::doRowRandomInitializationDirect(){ for (int i = 0; i < numRow; i++) rowCL[i] = i;}void Coclustering::doColRandomInitializationDirect(){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -