📄 mssricc.cc
字号:
/* MssrIcc.cc implementation of the coclustering algothm for the first problem, H = A - RR'ACC' in the paper, "Minimum Sum-Squared Residue Co-clustering of Gene Expression Data", with smoothing, local search, and variations of batch and local search update. cf. ||A-Ahat||^2 == ||A||^2 - ||R'*A*C||^2 == ||A||^2 - sum_rc (Atilde_rc^2)/(rowCS[r]*colCS[c]) for unnormalized Atilde == ||A||^2 - sum_rc (Atilde_rc^2)*(rowCS[r]*colCS[c]) for normalized Atilde cf. In batch update, we use a normalized compressed matrix. However, in local search (i.e., First Variation), we use an unnormalized compressed matrix in order to directly add and subtract values. "isNormalized" flag is used for cheching normalization. Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include "MatrixVector.h"#include "MssrIcc.h"MssrIcc::MssrIcc(Matrix *inputMatrix_CCS, Matrix *inputMatrix_CRS, commandLineArgument myCLA): Coclustering(inputMatrix_CCS, inputMatrix_CRS, myCLA){// cout << endl << "MssrIcc::MssrIcc()" << endl; if (isTakingReverse){ isReversed = new bool[numRow]; for (int r = 0; r < numRow; r++) isReversed[r] = false; memoryUsed += numRow * sizeof(bool); } squaredFNormA = myCCS->squaredFNorm(); // Sum_ij (A_ij)^2 rowQuality4Compressed = new double[numRowCluster]; // rowQuality4Compressed[i] = sum_j (Acompressed_ij)^2/(rowCS[i]*colCS[j]) colQuality4Compressed = new double[numColCluster]; // colQuality4Compressed[j] = sum_i (Acompressed_ij)^2/(rowCS[i]*colCS[j]) isNormalizedCompressed = isNormalizedRowCentroid = isNormalizedColCentroid = false; memoryUsed += (numRowCluster + numColCluster) * sizeof(double); if (isComputingOneWayObjective){ rowCentroid = new double*[numRowCluster]; for (int rc = 0; rc < numRowCluster; rc++) rowCentroid[rc] = new double[numCol]; colCentroid = new double*[numColCluster]; for (int cc = 0; cc < numColCluster; cc++) colCentroid[cc] = new double[numRow]; memoryUsed += (numRowCluster * numCol + numRow * numColCluster) * sizeof(double); }}MssrIcc::~MssrIcc(){ if (isTakingReverse) delete [] isReversed; delete [] rowQuality4Compressed; delete [] colQuality4Compressed; if (isComputingOneWayObjective){ for (int rc = 0; rc < numRowCluster; rc++) delete [] rowCentroid[rc]; delete [] rowCentroid; for (int cc = 0; cc < numColCluster; cc++) delete [] colCentroid[cc]; delete [] colCentroid; }// cout << endl << "MssrIcc::~MssrIcc()" << endl;}void MssrIcc::doInitialization(){ chooseInitializationMethod(); isEmptyRowClusterReported = isEmptyColClusterReported = false; computeRowClusterSize(); computeColClusterSize(); if (isTakingReverse) computeAcompressed(isReversed); else computeAcompressed(); isNormalizedCompressed = false; computeObjectiveFunction4Unnormalized();// cout << "Initialization done..." << endl; if (isComputingOneWayObjective){ computeRowCentroid(); normalizeRowCentroid(); computeObjectiveFunction4RowCluster(); computeColCentroid(); normalizeColCentroid(); computeObjectiveFunction4ColCluster(); } checkDumpLevel4InitialObjectValue();}void MssrIcc::computeRowCentroid(){ myCRS->computeRowCentroid(numRowCluster, rowCL, rowCentroid); isNormalizedRowCentroid = false;}void MssrIcc::computeColCentroid(){ myCCS->computeColCentroid(numColCluster, colCL, colCentroid); isNormalizedColCentroid = false;}void MssrIcc::normalizeRowCentroid(){// assert(!isNormalizedRowCentroid); for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++) if (rowCentroid[rc][c] != 0) rowCentroid[rc][c] /= rowCS[rc]; isNormalizedRowCentroid = true;}void MssrIcc::normalizeColCentroid(){// assert(!isNormalizedColCentroid); for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++) if (colCentroid[cc][r] != 0) colCentroid[cc][r] /= colCS[cc]; isNormalizedColCentroid = true;}void MssrIcc::normalizeCompressedMatrix(){// assert(!isNormalizedCompressed); for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) Acompressed[rc][cc] /= (rowCS[rc] * colCS[cc]); isNormalizedCompressed = true;}void MssrIcc::computeObjectiveFunction4Unnormalized(){ checkDumpLevel4Cocluster(dumpFile); objValue = squaredFNormA - computeQuality4CompressedUnnormalized();}void MssrIcc::computeObjectiveFunction4Normalized(){ checkDumpLevel4Cocluster(dumpFile); objValue = squaredFNormA - computeQuality4CompressedNormalized();}void MssrIcc::computeObjectiveFunction4Normalized(double **Acompressed){ checkDumpLevel4Cocluster(dumpFile);// objValue = squaredFNormA - computeQuality4CompressedNormalized(tempRowCL, tempColCL); objValue = myCCS->computeObjectiveFunctionValue(rowCL, colCL, Acompressed);}void MssrIcc::computeObjectiveFunction4Normalized(double **Acompressed, bool *isReversed){ checkDumpLevel4Cocluster(dumpFile);// objValue = squaredFNormA - computeQuality4CompressedNormalized(tempRowCL, tempColCL); objValue = myCCS->computeObjectiveFunctionValue(rowCL, colCL, Acompressed, isReversed);}void MssrIcc::computeObjectiveFunction4RowCluster(){ objValue4RowCluster = myCRS->computeObjectiveFunctionValue4RowCluster(rowCL, rowCentroid);}void MssrIcc::computeObjectiveFunction4ColCluster(){ objValue4ColCluster = myCCS->computeObjectiveFunctionValue4ColCluster(colCL, colCentroid);}void MssrIcc::computeRowQuality4Compressed2WayUnnormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Compressed[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) rowQuality4Compressed[rc] += (tempValue * tempValue) / (rowCS[rc] * colCS[cc]); }}void MssrIcc::computeRowQuality4Compressed2WayNormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Compressed[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) rowQuality4Compressed[rc] += (tempValue * tempValue) * (rowCS[rc] * colCS[cc]); }}void MssrIcc::computeRowQuality4Compressed1WayUnnormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Compressed[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) rowQuality4Compressed[rc] += (tempValue * tempValue) / (rowCS[rc] * rowCS[rc] * colCS[cc]); }}void MssrIcc::computeRowQuality4Compressed1WayNormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Compressed[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) rowQuality4Compressed[rc] += (tempValue * tempValue) * colCS[cc]; }}double MssrIcc::computeRowQuality4Compressed2WayUnnormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) / (rowCS[rc] * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed2WayNormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) * (rowCS[rc] * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed1WayUnnormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) / (rowCS[rc] * rowCS[rc] * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed1WayNormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) * colCS[cc]; } } return temp;}double MssrIcc::computeRowQuality4Compressed2WayUnnormalized(double *row2Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = row2Way[cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) / (rowClusterSize * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed2WayNormalized(double *row2Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = row2Way[cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) * (rowClusterSize * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed1WayUnnormalized(double *row1Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = row1Way[cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) / (rowClusterSize * rowClusterSize * colCS[cc]); } } return temp;}double MssrIcc::computeRowQuality4Compressed1WayNormalized(double *row1Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int cc = 0; cc < numColCluster; cc++){ tempValue = row1Way[cc]; if (tempValue != 0 && colCS[cc] > 0) temp += (tempValue * tempValue) * colCS[cc]; } } return temp;}void MssrIcc::computeColQuality4Compressed2WayUnnormalized(){ double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Compressed[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int rc = 0; rc < numRowCluster; rc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && rowCS[rc] > 0) colQuality4Compressed[cc] += (tempValue * tempValue) / (rowCS[rc] * colCS[cc]); }}void MssrIcc::computeColQuality4Compressed2WayNormalized(){ double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Compressed[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int rc = 0; rc < numRowCluster; rc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && rowCS[rc] > 0) colQuality4Compressed[cc] += (tempValue * tempValue) * (rowCS[rc] * colCS[cc]); }}void MssrIcc::computeColQuality4Compressed1WayUnnormalized(){ double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Compressed[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int rc = 0; rc < numRowCluster; rc++){ tempValue = Acompressed[rc][cc]; if (tempValue != 0 && rowCS[rc] > 0)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -