📄 mssriicc.cc
字号:
/* MssrIIcc.cc Implementation of the coclustering algothm for the second problem, H = A - RR'A - ACC' + RR'ACC' in the paper, "Minimum Sum-Squared Residue Co-clustering of Gene Expression Data", with smoothing, local search, and variations of batch and local search update. cf. ||A-Ahat||^2 == ||A||^2 - ||R'A||^2 - ||AC||^2 + ||R'*A*C||^2 where (||R'A||^2)_rc = sum(sum(rowCentroid .* rowCentroid * rowCS[rc])) (||AC||^2)_cc = sum(sum(colCentroid .* colCentroid * colCS[cc])) (||R'AC||^2)_rc_cc = sum(sum(Acompressed_rc_cc .* Acompressed_rc_cc * rowCS[rc] * colCS[cc])) or (||R'A||^2)_rc = sum(sum(rowCentroid .* rowCentroid / rowCS[rc])) (||AC||^2)_cc = sum(sum(colCentroid .* colCentroid / colCS[cc])) (||R'AC||^2)_rc_cc = sum(sum((Acompressed_rc_cc .* Acompressed_rc_cc) / (rowCS[rc] * colCS[cc]))) cf. In batch update, we use a normalized compressed matrix. However, in local search (i.e., First Variation), we use an unnormalized compressed matrix in order to directly add and subtract values. "isNormalizedCompressed", "isNormalizedRowCentroid", and "isNormalizedColCentroid" are used for checking the normalization. Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include "MatrixVector.h"#include "MssrIIcc.h"MssrIIcc::MssrIIcc(Matrix *inputMatrix_CCS, Matrix *inputMatrix_CRS, commandLineArgument myCLA): Coclustering(inputMatrix_CCS, inputMatrix_CRS, myCLA){// cout << endl << "MssrIIcc::MssrIIcc()" << endl; if (isTakingReverse){ isReversed = new bool[numRow]; for (int r = 0; r < numRow; r++) isReversed[r] = false; memoryUsed += numRow * sizeof(bool); } squaredFNormA = myCCS->squaredFNorm(); // Sum_ij (A_ij)^2 rowQuality4Compressed = new double[numRowCluster]; // rowQuality4Compressed[i] = sum_j (Acompressed_ij)^2/(rowCS[i]*colCS[j]) colQuality4Compressed = new double[numColCluster]; // colQuality4Compressed[j] = sum_i (Acompressed_ij)^2/(rowCS[i]*colCS[j]) memoryUsed += (numColCluster + numRowCluster) * sizeof(double); isNormalizedCompressed = false; isNormalizedRowCentroid = false; isNormalizedColCentroid = false; rowCentroid = new double*[numRowCluster]; for (int rc = 0; rc < numRowCluster; rc++) rowCentroid[rc] = new double[numCol]; colCentroid = new double*[numColCluster]; for (int cc = 0; cc < numColCluster; cc++) colCentroid[cc] = new double[numRow]; memoryUsed += (numRowCluster * numCol + numRow * numColCluster) * sizeof(double); rowAR = NULL; colAC = NULL; rowAP = new double[numCol]; colAP = new double[numRow]; memoryUsed += (numCol + numRow) * sizeof(double); rowQuality4Centroid = new double[numRowCluster]; colQuality4Centroid = new double[numColCluster]; memoryUsed += (numRowCluster + numColCluster) * sizeof(double);}MssrIIcc::~MssrIIcc(){ if (isTakingReverse) delete [] isReversed; delete [] rowQuality4Compressed; delete [] colQuality4Compressed; for (int rc = 0; rc < numRowCluster; rc++) delete [] rowCentroid[rc]; delete [] rowCentroid; for (int cc = 0; cc < numColCluster; cc++) delete [] colCentroid[cc]; delete [] colCentroid; delete [] rowAP; delete [] colAP; delete [] rowQuality4Centroid; delete [] colQuality4Centroid;// cout << endl << "MssrIIcc::~MssrIIcc()" << endl;}void MssrIIcc::doInitialization(){ chooseInitializationMethod(); isEmptyRowClusterReported = isEmptyColClusterReported = false; computeRowClusterSize(); computeColClusterSize(); if (isTakingReverse) computeAcompressed(isReversed); else computeAcompressed(); isNormalizedCompressed = false; normalizeCompressedMatrix(); if (isTakingReverse) computeRowCentroid(isReversed); else computeRowCentroid(); normalizeRowCentroid(); if (isTakingReverse) computeColCentroid(isReversed); else computeColCentroid(); normalizeColCentroid(); computeObjectiveFunction4Normalized();// cout << "Initialization done..." << endl; if (isComputingOneWayObjective){ computeRowCentroid(); normalizeRowCentroid(); computeObjectiveFunction4RowCluster(); computeColCentroid(); normalizeColCentroid(); computeObjectiveFunction4ColCluster(); } checkDumpLevel4InitialObjectValue();}void MssrIIcc::computeRowCentroid(){ myCRS->computeRowCentroid(numRowCluster, rowCL, rowCentroid); isNormalizedRowCentroid = false;} void MssrIIcc::computeRowCentroid(bool *isReversed){ myCRS->computeRowCentroid(numRowCluster, rowCL, rowCentroid, isReversed); isNormalizedRowCentroid = false;} void MssrIIcc::computeColCentroid(){ myCCS->computeColCentroid(numColCluster, colCL, colCentroid); isNormalizedColCentroid = false;}void MssrIIcc::computeColCentroid(bool *isReversed){ myCCS->computeColCentroid(numColCluster, colCL, colCentroid, isReversed); isNormalizedColCentroid = false;} void MssrIIcc::normalizeRowCentroid(){// assert(!isNormalizedRowCentroid); for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++) if (rowCentroid[rc][c] != 0) rowCentroid[rc][c] /= rowCS[rc]; isNormalizedRowCentroid = true;}void MssrIIcc::normalizeColCentroid(){// assert(!isNormalizedColCentroid); for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++) if (colCentroid[cc][r] != 0) colCentroid[cc][r] /= colCS[cc]; isNormalizedColCentroid = true;}void MssrIIcc::normalizeCompressedMatrix(){// assert(!isNormalizedCompressed); for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int cc = 0; cc < numColCluster; cc++) if (Acompressed[rc][cc] != 0 && colCS[cc] > 0) Acompressed[rc][cc] /= rowCS[rc] * colCS[cc]; isNormalizedCompressed = true;}void MssrIIcc::computeRowAR(){// assert(isNormalizedCompressed);// assert(isNormalizedRowCentroid); for (int rc = 0; rc < numRowCluster; rc++) for (int c = 0; c < numCol; c++) rowAR[rc][c] = rowCentroid[rc][c] - Acompressed[rc][colCL[c]];}void MssrIIcc::computeColAC(){// assert(isNormalizedCompressed);// assert(isNormalizedColCentroid); for (int cc = 0; cc < numColCluster; cc++) for (int r = 0; r < numRow; r++) colAC[cc][r] = colCentroid[cc][r] - Acompressed[rowCL[r]][cc];}void MssrIIcc::computeQuality4RowAR(){ // rowAR contains mAR = mR'A - mR'ACC'. double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Compressed[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++){ tempValue = rowAR[rc][c]; if (tempValue != 0) rowQuality4Compressed[rc] += tempValue * tempValue; } }void MssrIIcc::computeQuality4ColAC(){ // colAC contains mAC = mAC - mRR'AC. double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Compressed[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++){ tempValue = colAC[cc][r]; if (tempValue != 0) colQuality4Compressed[cc] += tempValue * tempValue; }}double MssrIIcc::computeQuality4RowCentroidUnnormalized(){ double tempValue = 0, temp = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) temp += (tempValue * tempValue) / rowCS[rc]; } return temp;}double MssrIIcc::computeQuality4RowCentroidNormalized(){ double tempValue = 0, temp = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) temp += (tempValue * tempValue) * rowCS[rc]; } return temp;}double MssrIIcc::computeQuality4ColCentroidUnnormalized(){ double tempValue = 0, temp = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++){ tempValue = colCentroid[cc][r]; if (tempValue != 0) temp += (tempValue * tempValue) / colCS[cc]; } return temp;}double MssrIIcc::computeQuality4ColCentroidNormalized(){ double tempValue = 0, temp = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++){ tempValue = colCentroid[cc][r]; if (tempValue != 0) temp += (tempValue * tempValue) * colCS[cc]; } return temp;}void MssrIIcc::computeRowQuality4CentroidUnnormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Centroid[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) rowQuality4Centroid[rc] += (tempValue * tempValue) / rowCS[rc]; }}void MssrIIcc::computeRowQuality4CentroidNormalized(){ double tempValue = 0; for (int rc = 0; rc < numRowCluster; rc++) rowQuality4Centroid[rc] = 0; for (int rc = 0; rc < numRowCluster; rc++) if (rowCS[rc] > 0) for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) rowQuality4Centroid[rc] += (tempValue * tempValue) * rowCS[rc]; }}double MssrIIcc::computeRowQuality4CentroidUnnormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) temp += (tempValue * tempValue) / rowCS[rc]; } } return temp;}double MssrIIcc::computeRowQuality4CentroidNormalized(int rc){ double tempValue = 0, temp = 0; if (rowCS[rc] > 0){ for (int c = 0; c < numCol; c++){ tempValue = rowCentroid[rc][c]; if (tempValue != 0) temp += (tempValue * tempValue) * rowCS[rc]; } } return temp;}double MssrIIcc::computeRowQuality4CentroidUnnormalized(double *row1Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int c = 0; c < numCol; c++){ tempValue = row1Way[c]; if (tempValue != 0) temp += (tempValue * tempValue) / rowClusterSize; } } return temp;}double MssrIIcc::computeRowQuality4CentroidNormalized(double *row1Way, int rowClusterSize){ double tempValue = 0, temp = 0; if (rowClusterSize > 0){ for (int c = 0; c < numCol; c++){ tempValue = row1Way[c]; if (tempValue != 0) temp += (tempValue * tempValue) * rowClusterSize; } } return temp;}void MssrIIcc::computeColQuality4CentroidUnnormalized(){ double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Centroid[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++){ tempValue = colCentroid[cc][r]; if (tempValue != 0) colQuality4Centroid[cc] += (tempValue * tempValue) / colCS[cc]; }}void MssrIIcc::computeColQuality4CentroidNormalized(){ double tempValue = 0; for (int cc = 0; cc < numColCluster; cc++) colQuality4Centroid[cc] = 0; for (int cc = 0; cc < numColCluster; cc++) if (colCS[cc] > 0) for (int r = 0; r < numRow; r++){ tempValue = colCentroid[cc][r]; if (tempValue != 0) colQuality4Centroid[cc] += (tempValue * tempValue) * colCS[cc]; }}double MssrIIcc::computeColQuality4CentroidUnnormalized(int cc){ double tempValue = 0, temp = 0; if (colCS[cc] > 0){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -