📄 itcc.cc
字号:
/* Itcc.cc Implementation of the information theoretic co-cluster algorithm with smoothing, local search, and variations of batch/local search update. Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <stdlib.h>#include "Itcc.h"#include "MatrixVector.h"Itcc::Itcc(Matrix *inputCCS, Matrix *inputCRS, commandLineArgument myCLA): Coclustering(inputCCS, inputCRS, myCLA){// cout << endl << "Itcc::Itcc()" << endl; if (myCCS->isHavingNegative()){ cout << " Invalid matrix for ITCC. Matrix should be non-negative." << endl << endl; exit(EXIT_FAILURE); } PlogP = myCCS->getPlogP(); mutualInfo = myCCS->getMutualInfo(); pX = myCCS->getPX(); pY = myCCS->getPY(); qYxhat = new double *[numRowCluster]; for (int rc = 0; rc < numRowCluster; rc++) qYxhat[rc] = new double[numCol]; qXyhat = new double *[numColCluster]; for (int cc = 0; cc < numColCluster; cc++) qXyhat[cc] = new double[numRow]; pxhat = new double[numRowCluster]; pyhat = new double[numColCluster]; memoryUsed += (numRowCluster * numCol + numColCluster * numRow + numRowCluster + numColCluster) * sizeof(double);}Itcc::~Itcc(){ delete [] pxhat; delete [] pyhat; for (int cc = 0; cc < numColCluster; cc++) delete [] qXyhat[cc]; delete [] qXyhat; for (int rc = 0; rc < numRowCluster; rc++) delete [] qYxhat[rc]; delete [] qYxhat;// cout << endl << "Itcc::~Itcc()" << endl;}void Itcc::computeRowCentroid(){ for (int rc = 0; rc < numRowCluster; rc++) for (int c = 0; c < numCol; c++) qYxhat[rc][c] = 0; for (int r = 0; r < numRow; r++){ int rc = rowCL[r]; double pX_r_over_pxhat_rc = pX[r] / pxhat[rc]; // maybe we need to check pX[r] > 0 for (int c = 0; c < numCol; c++){ int cc = colCL[c]; if (Acompressed[rc][cc] > 0) //qYxhat[rc][c] += Acompressed[rc][cc] * pX[r] / pxhat[rc] * pY[c] / pyhat[cc]; qYxhat[rc][c] += Acompressed[rc][cc] * pX_r_over_pxhat_rc * pY[c] / pyhat[cc]; } } for (int rc = 0; rc < numRowCluster; rc++) normalize_vec_1(qYxhat[rc], numCol); checkDumpLevel4Centroid(qYxhat, numRowCluster, numCol);}void Itcc::computeColCentroid(){ for (int cc = 0; cc < numColCluster; cc++) for (int r = 0; r < numRow; r++) qXyhat[cc][r] = 0; for (int c = 0; c < numCol; c++){ int cc = colCL[c]; double pY_c_over_pyhat_cc = pY[c] / pyhat[cc]; // maybe we need to check pY[c] > 0 for (int r = 0; r < numRow; r++){ int rc = rowCL[r]; if (Acompressed[rc][cc] > 0) //qXyhat[cc][r] += Acompressed[rc][cc] * pX[r] / pxhat[rc] * pY[c] / pyhat[cc]; qXyhat[cc][r] += Acompressed[rc][cc] * pX[r] / pxhat[rc] * pY_c_over_pyhat_cc ; } } for (int cc = 0; cc < numColCluster; cc++) normalize_vec_1(qXyhat[cc], numRow); checkDumpLevel4Centroid(qXyhat, numColCluster, numRow);}void Itcc::computeRowCentroid4RowCluster(){ myCRS->computeRowCentroid(numRowCluster, rowCL, qYxhat); isNormalizedRowCentroid = false;}void Itcc::computeColCentroid4ColCluster(){ myCCS->computeColCentroid(numColCluster, colCL, qXyhat); isNormalizedColCentroid = false;}void Itcc::computeMarginal(){ for (int rc = 0; rc < numRowCluster; rc++) pxhat[rc] = 0; for (int cc = 0; cc < numColCluster; cc++) pyhat[cc] = 0; for (int rc = 0; rc < numRowCluster; rc++) for (int cc = 0; cc < numColCluster; cc++) pxhat[rc] += Acompressed[rc][cc]; for (int rc = 0; rc < numRowCluster; rc++) for (int cc = 0; cc < numColCluster; cc++) pyhat[cc] += Acompressed[rc][cc];}void Itcc::computeObjectiveFunction(){ checkDumpLevel4Cocluster(dumpFile); //objValue = PlogP - myCCS->getPlogQ(Acompressed, rowCL, colCL, pxhat, pyhat); objValue = mutualInfo - computeMutualInfo(Acompressed, numRowCluster, numColCluster, pxhat, pyhat);}void Itcc::computeObjectiveFunction4RowCluster(){ objValue4RowCluster = mutualInfo - computeMutualInfo(qYxhat, numRowCluster, numCol);}void Itcc::computeObjectiveFunction4ColCluster(){ objValue4ColCluster = mutualInfo - computeMutualInfo(qXyhat, numColCluster, numRow);}void Itcc::doRowFarthestInitialization(){ double maxDis = MY_DBL_MIN; double *tempVec = new double[numCol]; double ** simMat; int maxInd = 0; bool *markPicked = new bool[numRow]; for (int i = 0; i < numRow; i++) markPicked[i] = false; for (int i = 0; i < numCol; i++) tempVec[i] = 0; for (int i = 0; i < numRow; i++) myCRS->ith_add_CV(i, tempVec); for (int i = 0; i < numRow; i++){// double temp = myCRS->Kullback_leibler(tempVec, i, NO_SMOOTHING, ROW_DIMENSION);//cout << "SMOOTHING_TYPE_4_ROW = " << smoothingType << endl; double temp = myCRS->Kullback_leibler(tempVec, i, smoothingType, ROW_DIMENSION); if (temp > maxDis){ maxDis = temp; maxInd = i; } } markPicked[maxInd] = true; myCRS->ith_add_CV(maxInd, qYxhat[0]); normalize_vec_1(qYxhat[0], numCol); delete [] tempVec; simMat = new double *[numRowCluster]; for (int i = 0; i < numRowCluster; i++) simMat[i] = new double[numRow]; for (int i = 0; i < numRow; i++) simMat[0][i] = rowDistance(i, 0); for (int i = 1; i < numRowCluster; i++){ double temp; maxDis = MY_DBL_MIN; maxInd = 0; for(int j = 0; j < numRow; j++) if (!markPicked[j]){ temp = 0; for (int k = 0; k < i; k++) temp += simMat[k][j]; if (temp > maxDis){ maxDis = temp; maxInd = j; } } markPicked[maxInd] = true; myCRS->ith_add_CV(maxInd, qYxhat[i]); normalize_vec_1(qYxhat[i], numCol); for(int j = 0; j < numRow; j++) simMat[i][j] = rowDistance(j, i); } for (int i = 0; i < numRow; i++) rowCL[i] = 0; for (int i = 0; i < numRowCluster; i++) rowCS[i] = 1; // just to make sure cluster size is >0 reassignRC(); for (int i = 0; i < numRowCluster; i++) delete [] simMat[i]; delete [] simMat; delete [] markPicked;}void Itcc::doColFarthestInitialization(){ double maxDis = MY_DBL_MIN; double *tempVec = new double[numRow]; double **simMat; int maxInd = 0; bool *markPicked = new bool[numCol]; for (int i = 0; i < numCol; i++) markPicked[i] = false; for (int i = 0; i < numRow; i++) tempVec[i] = 0; for (int i = 0; i < numCol; i++) myCCS->ith_add_CV(i, tempVec); for (int i = 0; i < numCol; i++){// double temp = myCCS->Kullback_leibler(tempVec, i, NO_SMOOTHING, COL_DIMENSION);//cout << "SMOOTHING_TYPE_4_COL = " << smoothingType << endl; double temp = myCCS->Kullback_leibler(tempVec, i, smoothingType, COL_DIMENSION); if (temp > maxDis){ maxDis = temp; maxInd = i; } } markPicked[maxInd] = true; myCCS->ith_add_CV(maxInd, qXyhat[0]); normalize_vec_1(qXyhat[0], numRow); delete [] tempVec; simMat = new double *[numColCluster]; for (int i = 0; i < numColCluster; i++) simMat[i] = new double [numCol]; for (int i = 0; i < numCol; i++) simMat[0][i] = colDistance(i, 0); for (int i = 1; i < numColCluster; i++){ double temp; maxDis = MY_DBL_MIN; maxInd = 0; for(int j = 0; j < numCol; j++) if (!markPicked[j]){ temp = 0; for (int k = 0; k < i; k++) temp += simMat[k][j]; if (temp > maxDis){ maxDis = temp; maxInd = j; } } markPicked[maxInd] = true; myCCS->ith_add_CV(maxInd, qXyhat[i]); normalize_vec_1(qXyhat[i], numRow); for(int j = 0; j < numCol; j++) simMat[i][j] = colDistance(j, i); } for (int i = 0; i < numCol; i++) colCL[i] = 0; for (int i = 0; i < numColCluster; i++) colCS[i] = 1; // just to make sure cluster size is >0 reassignCC(); for (int i = 0; i < numColCluster; i++) delete [] simMat[i]; delete [] simMat; delete [] markPicked;} void Itcc::doRowRandomPerturbInitialization(){ double tempNorm; double *tempVec = new double[numCol]; double *center = new double[numCol]; for (int i=0; i<numCol; i++) tempVec[i] = randNumGenerator.GetUniform() - 0.5; normalize_vec_1(tempVec, numCol); tempNorm = perturbationMagnitude * randNumGenerator.GetUniform(); for (int j = 0; j < numCol; j++) tempVec[j] *= tempNorm; for (int i = 0; i < numCol; i++) center[i] = 0; for (int i = 0; i < numRow; i++) myCRS->ith_add_CV(i, center); for (int i = 0; i < numRowCluster; i++){ for(int j = 0; j < numCol; j++) qYxhat[i][j] = center[j] * fabs(tempVec[j]+1); normalize_vec_1(qYxhat[i], numCol); } for (int i = 0; i < numRow; i++) rowCL[i] = 0; for (int i = 0; i < numRowCluster; i++) rowCS[i] = 1; // just to make sure cluster size is >0 reassignRC(); delete [] tempVec; delete [] center;}void Itcc::doColRandomPerturbInitialization(){ double tempNorm; double *tempVec = new double[numRow]; double *center = new double[numRow]; for (int i=0; i<numRow; i++) tempVec[i] = randNumGenerator.GetUniform() - 0.5; normalize_vec_1(tempVec, numRow); tempNorm = perturbationMagnitude * randNumGenerator.GetUniform(); for (int j = 0; j < numRow; j++) tempVec[j] *= tempNorm; for (int i = 0; i < numRow; i++) center[i] = 0; for (int i = 0; i < numCol; i++) myCCS->ith_add_CV(i, center); for (int i = 0; i < numColCluster; i++){ for(int j = 0; j < numRow; j++) qXyhat[i][j] = center[j] * fabs(tempVec[j]+1); normalize_vec_1(qXyhat[i], numRow); } for (int i = 0; i < numCol; i++) colCL[i] = 0; for (int i = 0; i < numColCluster; i++) colCS[i] = 1; // just to make sure cluster size is >0 reassignCC(); delete [] tempVec; delete [] center;}double Itcc::rowDistance(int r, int rc){ return myCRS->Kullback_leibler(qYxhat[rc], r, smoothingType, ROW_DIMENSION);}double Itcc::colDistance(int c, int cc){ return myCCS->Kullback_leibler(qXyhat[cc], c, smoothingType, COL_DIMENSION);}void Itcc::reassignRC(){ int rowClusterChange = 0; for (int r = 0; r < numRow; r++){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -