📄 sparsematrix.cc
字号:
/* SparseMatrix.cc Implementation of the SparseMatrix class Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <cmath>#include <assert.h>#include <stdlib.h>#include "SparseMatrix.h"SparseMatrix::SparseMatrix(int row, int col, int nz, double *val, int *rowind, int *colptr) : Matrix(row, col){ numRow = row; numCol = col; numVal = nz; value = val; colPtr = colptr; rowIdx = rowind;}SparseMatrix::~SparseMatrix(){ if (norm != NULL) delete[] norm; if (L1_norm != NULL) delete[] L1_norm;}double SparseMatrix::operator()(int i, int j) const{ assert(i >= 0 && i < numRow && j >= 0 && j < numCol); for (int t = colPtr[j]; t < colPtr[j+1]; t++) if (rowIdx[t] == i) return value[t]; return 0.0;}double SparseMatrix::dot_mult(double *v, int i) /*compute the dot-product between the ith vector (in the sparse matrix) with vector v result is returned. */{ double result = 0.0; for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * v[rowIdx[j]]; return result;}void SparseMatrix::trans_mult(double *x, double *result) /*compute the dot-product between every vector (in the sparse matrix) with vector v results are stored in array 'result'. */{ for (int i = 0; i < numCol; i++) result[i] = dot_mult(x, i);}double SparseMatrix::squared_dot_mult(double *v, int i) /*compute the dot-product between the ith vector (in the sparse matrix) with vector v result is returned. */{ double result = 0.0; for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * v[rowIdx[j]]; return result * result;}void SparseMatrix::squared_trans_mult(double *x, double *result) /*compute the dot-product between every vector (in the sparse matrix) with vector v results are stored in array 'result'. */{ for (int i = 0; i < numCol; i++) result[i] = squared_dot_mult(x, i);}void SparseMatrix::A_trans_A(int flag, int * index, int *pointers, double **A_t_A, int & nz_counter)/* computes A'A given doc_IDs in the same cluster; index[] contains doc_IDs for all docs, pointers[] gives the range of doc_ID belonging to the same cluster; the resulting matrix is stored in dense format A_t_A( d by d matrix) The right SVs of A are the corresponding EVs of A'A Notice that Gene expression matrix is usually n by d where n is #genes; this matrix format is different from that of document matrix. In the main memory the matrix is stored in the format of document matrix. */{ int clustersize = pointers[1] - pointers[0]; for (int i = 0; i < numRow; i++) for (int j = 0; j < numRow; j++) A_t_A[i][j] = 0; nz_counter = 0; if (flag > 0){ for (int k = 0; k < clustersize; k++) for (int l = colPtr[index[k+pointers[0]]]; l < colPtr[index[k+pointers[0]]+1]; l++) for (int j = colPtr[index[k+pointers[0]]]; j < colPtr[index[k+pointers[0]]+1]; j++) A_t_A[rowIdx[l]][rowIdx[j]] += value[l] * value[j]; } else { for (int k = 0; k < clustersize; k++) for (int l = colPtr[k+pointers[0]]; l < colPtr[k+pointers[0]+1]; l++) for (int j = colPtr[k+pointers[0]]; j < colPtr[k+pointers[0]+1]; j++) A_t_A[rowIdx[l]][rowIdx[j]] += value[l] * value[j]; } for (int i = 0; i < numRow; i++) for (int j = 0; j < numRow; j++) if (A_t_A[i][j] > 0) nz_counter++;}void SparseMatrix::dense_2_sparse(int* AtA_colptr, int *AtA_rowind, double *AtA_val, double **A_t_A){ int k = 0; AtA_colptr[0] = 0; for (int j = 0; j < numRow; j++){ int l = 0; for (int i = 0; i < numRow; i++) if (A_t_A[i][j] > 0){ AtA_val[k] = A_t_A[i][j]; AtA_rowind[k++] = i; l++; } AtA_colptr[j+1] = AtA_colptr[j] + l; }}void SparseMatrix::right_dom_SV(int *cluster, int *cluster_size, int n_Clusters, double ** CV, double *cluster_quality, int flag) /* flag == -1, then update all the centroids and the cluster qualities; 0 <= flag < n_Clusters , then update the centroid and the cluster quality specified by 'flag'; n_Clusters <= flag <2*n_Clusters, then update the cluster quality specified by 'flag-n_Clusters'; */{ int *pointer = new int[n_Clusters+1], *index = new int[numCol], *range =new int[2], nz_counter; int *AtA_rowind = NULL, *AtA_colptr = NULL; double *AtA_val = NULL; double **A_t_A; A_t_A =new double *[numRow]; for (int i = 0; i < numRow; i++) A_t_A[i] = new double[numRow]; pointer[0] = 0; for (int i = 1; i < n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; for (int i = 0; i < numCol; i++){ index[pointer[cluster[i]]] = i; pointer[cluster[i]]++; } pointer[0] = 0; for (int i = 1; i <= n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; if (flag <0){ // compute eigval and eigvec for all clusters for (int i = 0; i < n_Clusters; i++){ range[0] = pointer[i]; range[1] = pointer[i+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new double [nz_counter]; AtA_rowind = new int [nz_counter]; AtA_colptr = new int [numRow+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); /* for (int l = 0; l < numRow; l++){ for (int j = 0; j < numRow; j++) cout << A_t_A[l][j] << " "; cout<<endl; } for (int l = 0; l <= numRow; l++) cout << AtA_colptr[l] << " "; cout << endl; for (int l = 0; l < nz_counter; l++) cout << AtA_rowind[l] << " "; cout << endl; for (int l = 0; l < nz_counter; l++) cout << AtA_val[l] << " "; cout<<endl<<endl; */ power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, CV[i], CV[i], cluster_quality[i]);/* for (int l = 0; l < numRow; l++) cout<<CV[i][l]<<" "; cout<<cluster_quality[i]<<endl;*/ delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } for (int i = 0; i < numRow; i++) delete [] A_t_A[i]; delete [] A_t_A; } else if ((flag >= 0) && (flag < n_Clusters)){ //compute eigval and eigvec for cluster 'flag' range[0] = pointer[flag]; range[1] = pointer[flag+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new double[nz_counter]; AtA_rowind = new int[nz_counter]; AtA_colptr = new int[numRow+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); for (int i = 0; i < numRow; i++) delete [] A_t_A[i]; delete [] A_t_A; power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, CV[flag], CV[flag], cluster_quality[flag]); delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } else if ((flag >= n_Clusters) && (flag <= 2 * n_Clusters)){ // compute eigval ONLY for cluster 'flag-n_Clusters', eigvec for cluster 'flag-n_Clusters' no change range[0] = pointer[flag-n_Clusters]; range[1] = pointer[flag-n_Clusters+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new double [nz_counter]; AtA_rowind = new int [nz_counter]; AtA_colptr = new int [numRow+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); for (int i = 0; i < numRow; i++) delete [] A_t_A[i]; delete [] A_t_A; power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, NULL, CV[flag-n_Clusters], cluster_quality[flag-n_Clusters]); delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } delete [] pointer; delete [] index; delete [] range;}double SparseMatrix::euc_dis(double *v, int i, double norm_v) /* Given L2-norms of the vecs and v, norm[i] and norm_v, compute the squared Euc-dis between ith vec in the matrix and v, result is returned. Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c */{ double result = 0.0; for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * v[rowIdx[j]]; result *= -2.0; result += norm[i]+norm_v; return result;}void SparseMatrix::euc_dis(double *x, double norm_x, double *result) /* Given L2-norms of the vecs and x, norm[i] and norm_x, compute the squared Euc-dis between each vec in the matrix with x, results are stored in array 'result'. Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c */{ for (int i = 0; i < numCol; i++) result[i] = euc_dis(x, i, norm_x);}double SparseMatrix::Kullback_leibler(double *x, int i, int laplace, double L1norm_x) /* Given the L1_norms of vec[i] and x, (vec[i] need be normalized before function-call compute KL divergence between vec[i] in the matrix with x, result is returned. Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) the KL is in unit of nats NOT bits. */{ double result = 0.0, row_inv_alpha = smoothingFactor * L1norm_x / numRow, m_e = smoothingFactor * L1norm_x; if (priors[i] > 0){ switch (laplace){ case NO_SMOOTHING: for (int j = colPtr[i]; j < colPtr[i+1]; j++){ double tempValue = x[rowIdx[j]]; if(tempValue > 0.0) result += value[j] * log(tempValue); else return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20 } result = norm[i] - result + log(L1norm_x); break; case UNIFORM_SMOOTHING: // this vector alpha is alpha (given by user) divided by |Y|, //row_inv_alpha is to make computation faster for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * log(x[rowIdx[j]] + row_inv_alpha); result = norm[i]-result+log((1+smoothingFactor)*L1norm_x); break; case MAXIMUM_ENTROPY_SMOOTHING: // this vector alpha is alpha (given by user) divided by |Y|, //row_inv_alpha is to make computation faster for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * log(x[rowIdx[j]] + m_e * p_x[rowIdx[j]]); result = norm[i]-result+log((1+smoothingFactor)*L1norm_x); break; case LAPLACE_SMOOTHING: // not used in co-clustering... // this vector alpha is alpha (given by user) divided by |X|*|Y|, //row_alpha is its L1-norm for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += (value[j]+row_inv_alpha) * log(x[rowIdx[j]]) ; result = norm[i]-result/(1+smoothingFactor); break; default: break; } } return result;}void SparseMatrix::Kullback_leibler(double *x, double *result, int laplace, double L1norm_x) // Given the KL-norm of the vecs, norm[i] (already considered prior), // compute KL divergence between each vec in the matrix with x, // results are stored in array 'result'. // Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) { for (int i = 0; i < numCol; i++) result[i] = Kullback_leibler(x, i, laplace, L1norm_x); }double SparseMatrix::Kullback_leibler(double *x, int i, int laplace) /* Given the L1_norms of vec[i] and x, (vec[i] and x need be normalized before function-call compute KL divergence between vec[i] in the matrix with x, result is returned. Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) the KL is in unit of nats NOT bits. */{ double result = 0.0, row_inv_alpha = smoothingFactor / numRow, m_e = smoothingFactor; if (priors[i] > 0){ switch (laplace){ case NO_SMOOTHING: for (int j = colPtr[i]; j < colPtr[i+1]; j++){ if (x[rowIdx[j]] > 0.0) result += value[j] * log(x[rowIdx[j]]); else return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20 } result = norm[i] - result; break; case UNIFORM_SMOOTHING: // this vector alpha is alpha (given by user) divided by |Y|, //row_inv_alpha is to make computation faster for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * log(x[rowIdx[j]] + row_inv_alpha); result = norm[i]-result+log(1+smoothingFactor); break; case MAXIMUM_ENTROPY_SMOOTHING: // this vector alpha is alpha (given by user) divided by |Y|, //row_inv_alpha is to make computation faster for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += value[j] * log(x[rowIdx[j]] + m_e * p_x[rowIdx[j]]); result = norm[i]-result+log(1+smoothingFactor); break; case LAPLACE_SMOOTHING: // this vector alpha is alpha (given by user) divided by |X|*|Y|, //row_alpha is its L1-norm for (int j = colPtr[i]; j < colPtr[i+1]; j++) result += (value[j] + row_inv_alpha) * log(x[rowIdx[j]]) ; result = norm[i]-result/(1+smoothingFactor); break; default: break; } } return result;}void SparseMatrix::Kullback_leibler(double *x, double *result, int laplace) // Given the KL-norm of the vecs, norm[i] (already considered prior), // compute KL divergence between each vec in the matrix with x, // results are stored in array 'result'. // Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) { for (int i = 0; i < numCol; i++) result[i] = Kullback_leibler(x, i, laplace); }double SparseMatrix::Jenson_Shannon(double *x, int i, double prior_x) /* Given the prior of vec[i], compute JS divergence between vec[i] in the data matrix with x, result in nats is returned. */{ double result = 0.0, *p_bar, p1, p2; if ((priors[i] > 0) && (prior_x > 0)){ p1 = priors[i] / (priors[i] + prior_x); p2 = prior_x / (priors[i] + prior_x); p_bar = new double[numRow]; for (int j = 0; j < numRow; j++) p_bar[j] = p2 * x[j]; for (int j = colPtr[i]; j < colPtr[i+1]; j++) p_bar[rowIdx[j]] += p1 * value[j]; result = p1 * Kullback_leibler(p_bar, i, NO_SMOOTHING) + ::Kullback_leibler(x, p_bar, numRow) * p2; delete [] p_bar; } return result; // the real JS value should be this result devided by L1_norm[i]+l1n_x}void SparseMatrix::Jenson_Shannon(double *x, double *result, double prior_x) /* Given the prior of vec[i] and x; vec[i] and x are all normalized compute JS divergence between all vec[i] in the data matrix with x, result in nats. */{ for (int i = 0; i < numCol; i++) result[i] = Jenson_Shannon(x, i, prior_x);}void SparseMatrix::computeNorm_2() /* compute the squared L-2 norms for each vec in the matrix first check if array 'norm' has been given memory space */{ if (norm == NULL){ norm = new double[numCol]; memoryUsed += numCol * sizeof(double); } for (int i = 0; i < numCol; i++){ norm[i] = 0.0; for (int j = colPtr[i]; j < colPtr[i+1]; j++){ double tempValue = value[j]; norm[i] += tempValue * tempValue; } }}void SparseMatrix::computeNorm_1()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -