📄 coclustering.cc
字号:
for (int i = 0; i < numCol; i++) colCL[i] = i;}void Coclustering::doSeedingInitializationI(char * seedingFilename){ int rn, cn, ID; std::ifstream gpfile(seedingFilename); if (gpfile.is_open()){ cout << " Reading cluster labels: " << seedingFilename << endl; for (int i = 0; i < numRowCluster; i++) for (int j = 0; j < numColCluster; j++){ gpfile >> rn >> cn; for (int k = 0; k < rn; k++){ gpfile >> ID; rowCL[ID] = i; } for (int k = 0; k < cn; k++){ gpfile >> ID; colCL[ID] = j; } } } else { cout << " !!! Seeding file open error: " << seedingFilename << " !!!" << endl << endl; exit(EXIT_FAILURE); }/* { cout << "Seeding file open error: " << seedingFilename << endl << ". So doing random initialization" << endl; randomInitial(numRowCluster, numRow, rowCL); randomInitial(numColCluster, numCol, colCL); }*/}void Coclustering::doSeedingInitializationII(char * seedingFilename){ std::ifstream gpfile(seedingFilename); if (gpfile.is_open()){ cout << " Reading cluster labels: " << seedingFilename << endl; for (int i = 0; i < numRow; i++) gpfile >> rowCL[i]; for (int j = 0; j < numCol; j++) gpfile >> colCL[j]; } else { cout << " !!! Seeding file open error: " << seedingFilename << " !!!" << endl << endl; exit(EXIT_FAILURE); }/* { cout << "Seeding file open error: " << seedingFilename << endl << "So doing random initialization" << endl; randomInitial(numRowCluster, numRow, rowCL); randomInitial(numColCluster, numCol, colCL); }*/}void Coclustering::doRowCLVecInitialization(){ if (numRowCluster == 1) for (int i = 0; i < numRow; i++) rowCLVec.push_back(0); else if (numRowCluster == numRow) for (int i = 0; i < numRow; i++) rowCLVec.push_back(i); else for (int i = 0; i < numRow; i++) rowCLVec.push_back(i % numRowCluster);}void Coclustering::doColCLVecInitialization(){ if (numColCluster == 1) for (int i = 0; i < numCol; i++) colCLVec.push_back(0); else if (numColCluster == numCol) for (int i = 0; i < numCol; i++) colCLVec.push_back(i); else for (int i = 0; i < numCol; i++) colCLVec.push_back(i % numColCluster);}void Coclustering::doRowPermutationInitialization(){ bool has_next_permutation = true; if ((numRowCluster != 1) && (numRowCluster != numRow)) for (int i = 0; i < numRowPermutation && has_next_permutation; i++) has_next_permutation = next_permutation(rowCLVec.begin(), rowCLVec.end()); assert(has_next_permutation); for (int i = 0; i < numRow; i++) rowCL[i] = rowCLVec[i];}void Coclustering::doColPermutationInitialization(){ bool has_next_permutation = true; if ((numColCluster != 1) && (numColCluster != numCol)) for (int i = 0; i < numColPermutation && has_next_permutation; i++) has_next_permutation = next_permutation(colCLVec.begin(), colCLVec.end()); assert(has_next_permutation); for (int i = 0; i < numCol; i++) colCL[i] = colCLVec[i];}void Coclustering::checkHavingReversedRow(){ bool havingReversed = false; for (int r = 0; r < numRow; r++) if (rowCL[r] < 0){ havingReversed = true; break; } if (havingReversed){ if (!isTakingReverse){ isTakingReverse = true; isReversed = new bool[numRow]; memoryUsed += numRow * sizeof(bool); } for (int r = 0; r < numRow; r++) if (rowCL[r] < 0){ isReversed[r] = true; rowCL[r] *= -1; } else isReversed[r] = false; }} void Coclustering::updateVariable(double &minDistance, int &minCL, double tempDistance, int tempCL){ if (tempDistance < minDistance){ minDistance = tempDistance; minCL = tempCL; }}void Coclustering::updateVariable(double &minDistance, int &minCL, bool &tempIsReversed, double tempDistance, int tempCL, bool trueOrFalse){ if (tempDistance < minDistance){ minDistance = tempDistance; minCL = tempCL; tempIsReversed = trueOrFalse; }}void Coclustering::computeNumReversedRow(){ numReversedRow = 0; for (int r = 0; r < numRow; r++) if (isReversed[r]) numReversedRow++;}void Coclustering::adjustClusterLabel(int value){ for (int r = 0; r < numRow; r++) rowCL[r] += value; for (int c = 0; c < numCol; c++) colCL[c] += value;} void Coclustering::computeRowClusterSize(){ for (int i = 0; i < numRowCluster; i++) rowCS[i] = 0; for (int i = 0; i < numRow; i++) rowCS[rowCL[i]]++; numEmptyRowCluster = 0; numSingletonRowCluster = 0; for (int i = 0; i < numRowCluster; i++){ if (rowCS[i] == 0) numEmptyRowCluster++; // count # of empty row cluster(s) else if (rowCS[i] == 1) numSingletonRowCluster++; // count # of singleton row cluster(s) } if (numEmptyRowCluster > 0){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << " ### " << numEmptyRowCluster << " empty row cluster(s) ###" << endl; isEmptyRowClusterReported = true; } else if (isEmptyRowClusterReported){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " !!! Fixing empty row cluster(s) !!!" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " !!! Fixing empty row cluster(s) !!!" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << " !!! Fixing empty row cluster(s) !!!" << endl; isEmptyRowClusterReported = false; } if (numSingletonRowCluster > 0){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << " ### " << numSingletonRowCluster << " singleton row cluster(s) ###" << endl; }}void Coclustering::computeColClusterSize(){ for (int i = 0; i < numColCluster; i++) colCS[i] = 0; for (int i = 0; i < numCol; i++) colCS[colCL[i]]++; numEmptyColCluster = 0; numSingletonColCluster = 0; for (int i = 0; i < numColCluster; i++){ if (colCS[i] == 0) numEmptyColCluster++; // count # of empty column cluster(s) else if (colCS[i] == 1) numSingletonColCluster++; // count # of singleton row cluster(s) } if (numEmptyColCluster > 0){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << endl << " ### " << numEmptyColCluster << " empty col cluster(s) ###" << endl; isEmptyColClusterReported = true; } else if (isEmptyColClusterReported){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " !!! Fixing empty col cluster(s) !!!" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " !!! Fixing empty col cluster(s) !!!" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << " !!! Fixing empty col cluster(s) !!!" << endl; isEmptyColClusterReported = false; } if (numSingletonColCluster > 0){ switch (dumpLevel){ case MINIMUM_DUMP_LEVEL: case BATCH_UPDATE_DUMP_LEVEL: break; case LOCAL_SEARCH_DUMP_LEVEL: cout << " ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl; break; case MAXIMUM_DUMP_LEVEL: dumpFile << " ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl; break; } if (statisticsAccessMode != NO_OPEN_MODE) statisticsFile << " ### " << numSingletonColCluster << " singleton col cluster(s) ###" << endl; }}void Coclustering::removeEmptyCluster(){ int *rowClusterNewLabel = new int[numRowCluster], *colClusterNewLabel = new int[numColCluster]; int tmp_label = 0; if (numEmptyRowCluster > 0){ for (int i = 0; i < numRowCluster; i++){ if (rowCS[i] > 0){ rowClusterNewLabel[i]= tmp_label; tmp_label++; } } for (int i = 0; i < numRow; i++) rowCL[i] = rowClusterNewLabel[rowCL[i]]; } tmp_label = 0; if (numEmptyColCluster > 0){ for (int i = 0; i < numColCluster; i++){ if (colCS[i] > 0){ colClusterNewLabel[i] = tmp_label; tmp_label++; } } for (int i = 0; i < numCol; i++) colCL[i] = colClusterNewLabel[colCL[i]]; } delete [] rowClusterNewLabel; delete [] colClusterNewLabel;}void Coclustering::writeCocluster(){ removeEmptyCluster(); computeRowClusterSize(); computeColClusterSize(); if (coclusterLabelType == BLOCK_FORMAT){ int *rowChecker = new int[numRow]; int *colChecker = new int[numCol]; int *rowBin = new int[numRowCluster]; int *colBin = new int[numColCluster]; rowBin[0] = 0; for (int i = 1; i < numRowCluster; i++) rowBin[i] = rowBin[i-1] + rowCS[i-1]; colBin[0] = 0; for (int j = 1; j < numColCluster; j++) colBin[j] = colBin[j-1] + colCS[j-1]; for (int i = 0; i < numRow; i++){ rowChecker[rowBin[rowCL[i]]] = i; rowBin[rowCL[i]]++; } for (int i = 0; i < numCol; i++){ colChecker[colBin[colCL[i]]] = i; colBin[colCL[i]]++; } int rowIndex = 0, colIndex = 0; for (int i = 0; i < numRowCluster; i++){ colIndex = 0; if (rowCS[i] > 0){ for (int j = 0; j < numColCluster; j++){ if (colCS[j] > 0){ coclusterFile << rowCS[i] << " " << colCS[j] << endl; for (int k = rowIndex; k < rowIndex+rowCS[i]; k++){ if (isTakingReverse){ if (isReversed[rowChecker[k]]) coclusterFile << "-"; coclusterFile << (rowChecker[k]+1) << " "; } else if (coclusterOffsetType == START_FROM_0){ coclusterFile << rowChecker[k] << " "; } else if (coclusterOffsetType == START_FROM_1){ coclusterFile << (rowChecker[k]+1) << " "; } } coclusterFile << endl; for (int k = colIndex; k < colIndex+colCS[j]; k++){ if (coclusterOffsetType == START_FROM_0) coclusterFile << colChecker[k] << " "; else if (coclusterOffsetType == START_FROM_1) coclusterFile << (colChecker[k]+1) << " "; } coclusterFile << endl; } colIndex += colCS[j]; } } rowIndex += rowCS[i]; } delete [] rowChecker; delete [] colChecker; delete [] rowBin; delete [] colBin; } else {// removeEmptyCluster(); for (int i = 0; i < numRow; i++){ if (isTakingReverse){ if (isReversed[i]) coclusterFile << "-"; coclusterFile << (rowCL[i]+1) << " "; } else if (coclusterOffsetType == START_FROM_0){ coclusterFile << rowCL[i] << " "; } else coclusterFile << (rowCL[i]+1) << " "; } coclusterFile << endl; for (int j = 0; j < numCol; j++){ if (coclusterOffsetType == START_FROM_0) coclusterFile << colCL[j] << " "; else coclusterFile << (colCL[j]+1) << " "; } coclusterFile << endl; }}void Coclustering::setSilent(bool s) // not used...{ isSilent = s;}void Coclustering::setRowSmoothingFactor(double p){ rowSmoothingFactor = p;}void Coclustering::setColSmoothingFactor(double p){ colSmoothingFactor = p;}void Coclustering::computeAcompressed(){ myCCS->condenseMatrix(rowCL, colCL, numRowCluster, numColCluster, Acompressed);}void Coclustering::computeAcompressed(bool *isReversed){ myCCS->condenseMatrix(rowCL, colCL, numRowCluster, numColCluster, Acompressed, isReversed);}void Coclustering::validateRowCluster(int numRowClass, int *rowClassLabel){ ExternalValidity ev(numRowClass, numRowCluster, numRow, rowClassLabel, rowCL);/*// if (dumpLevel > MINIMUM_DUMP_LEVEL){ cout << endl << " ### External Row Cluster Validation ###" << endl << endl; if (isShowingEachCluster) ev.printCM(cout); ev.purity_Entropy_MutInfo(cout, isShowingEachCluster); ev.F_measure(cout); ev.micro_avg_precision_recall(rowPrecision, rowRecall, cout);// } if (statisticsAccessMode != NO_OPEN_MODE){ statisticsFile << endl << " ### External Row Cluster Validation ###" << endl << endl; if (isShowingEachCluster) ev.printCM(statisticsFile); ev.purity_Entropy_MutInfo(statisticsFile, isShowingEachCluster); ev.F_measure(statisticsFile); ev.micro_avg_precision_recall(rowPrecision, rowRecall, statisticsFile); } if (dumpAccessMode != NO_OPEN_MODE){ dumpFile << endl << " ### External Row Cluster Validation ###" << endl << endl; if (isShowingEachCluster)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -