📄 sparsematrix.cc
字号:
/* Implementation of SparseMatrix class * Copyright (c) 2003, Yuqiang Guan */#include <assert.h>#include <stdlib.h>#include <iostream.h>#include <math.h>#include "SparseMatrix.h"SparseMatrix::SparseMatrix(int row, int col, int nz, float *val, int *rowind, int *colptr) : Matrix (row, col){ n_row = row; n_col = col; n_nz = nz; vals = val; rowinds = rowind; colptrs = colptr; memory_used += (n_row+n_col)*sizeof(int)+n_nz*sizeof(float);}SparseMatrix::~SparseMatrix(){ if (norm != NULL) delete[] norm; if (L1_norm != NULL) delete[] L1_norm;}float SparseMatrix::operator()(int i, int j) const{ assert(i >= 0 && i < n_row && j >= 0 && j < n_col); for (int t = colptrs[j]; t < colptrs[j+1]; t++) if (rowinds[t] == i) return vals[t]; return 0.0;}float SparseMatrix::dot_mult(float *v, int i) /*compute the dot-product between the ith vector (in the sparse matrix) with vector v result is returned. */{ float result=0.0; for (int j = colptrs[i]; j < colptrs[i+1]; j++) result += vals[j] * v[rowinds[j]]; return result;}void SparseMatrix::trans_mult(float *x, float *result) /*compute the dot-product between every vector (in the sparse matrix) with vector v results are stored in array 'result'. */{ for (int i = 0; i < n_col; i++) result[i] = dot_mult(x, i);}float SparseMatrix::squared_dot_mult(float *v, int i) /*compute the dot-product between the ith vector (in the sparse matrix) with vector v result is returned. */{ float result=0.0; for (int j = colptrs[i]; j < colptrs[i+1]; j++) result += vals[j] * v[rowinds[j]]; return result*result;}void SparseMatrix::squared_trans_mult(float *x, float *result) /*compute the dot-product between every vector (in the sparse matrix) with vector v results are stored in array 'result'. */{ for (int i = 0; i < n_col; i++) result[i] = squared_dot_mult(x, i);}void SparseMatrix::A_trans_A(int flag, int * index, int *pointers, float **A_t_A, int & nz_counter)/* computes A'A given doc_IDs in the same cluster; index[] contains doc_IDs for all docs, pointers[] gives the range of doc_ID belonging to the same cluster; the resulting matrix is stored in dense format A_t_A( d by d matrix) The right SVs of A are the corresponding EVs of A'A Notice that Gene expression matrix is usually n by d where n is #genes; this matrix format is different from that of document matrix. In the main memory the matrix is stored in the format of document matrix. */{ int i, j, k, l, clustersize = pointers[1]-pointers[0]; for (i=0; i<n_row; i++) for (j=0; j<n_row; j++) A_t_A[i][j] = 0; nz_counter=0; if (flag >0) { for (k=0; k<clustersize; k++) for (l = colptrs[index[k+pointers[0]]]; l < colptrs[index[k+pointers[0]]+1]; l++) for (j = colptrs[index[k+pointers[0]]]; j < colptrs[index[k+pointers[0]]+1]; j++) { A_t_A[rowinds[l]][rowinds[j]] += vals[l]*vals[j]; } } else { for (k=0; k<clustersize; k++) for (l = colptrs[k+pointers[0]]; l < colptrs[k+pointers[0]+1]; l++) for (j = colptrs[k+pointers[0]]; j < colptrs[k+pointers[0]+1]; j++) { A_t_A[rowinds[l]][rowinds[j]] += vals[l]*vals[j]; } } for (i=0; i<n_row; i++) for (j=0; j<n_row; j++) if (A_t_A[i][j] >0) nz_counter++;}void SparseMatrix::dense_2_sparse(int* AtA_colptr, int *AtA_rowind, float *AtA_val, float **A_t_A){ int i, l, j, k; k=0; AtA_colptr[0] = 0; for (j=0; j<n_row; j++) { l=0; for (i=0; i<n_row; i++) if (A_t_A[i][j] > 0) { AtA_val[k] = A_t_A[i][j]; AtA_rowind[k++] = i; l++; } AtA_colptr[j+1] = AtA_colptr[j] + l; }}void SparseMatrix::right_dom_SV(int *cluster, int *cluster_size, int n_Clusters, float ** CV, float *cluster_quality, int flag) /* flag == -1, then update all the centroids and the cluster qualities; 0 <= flag < n_Clusters , then update the centroid and the cluster quality specified by 'flag'; n_Clusters <= flag <2*n_Clusters, then update the cluster quality specified by 'flag-n_Clusters'; */{ int i, *pointer = new int[n_Clusters+1], *index = new int[n_col], *range =new int[2], nz_counter; int *AtA_rowind =NULL, *AtA_colptr=NULL; float *AtA_val =NULL; float **A_t_A; A_t_A =new (float*) [n_row]; for (i=0; i<n_row; i++) A_t_A [i] = new float [n_row]; pointer[0] =0; for (i=1; i<n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; for (i=0; i<n_col; i++) { index[pointer[cluster[i]]] = i; pointer[cluster[i]] ++; } pointer[0] =0; for (i=1; i<=n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; if (flag <0) // compute eigval and eigvec for all clusters { for (i=0; i<n_Clusters; i++) { range[0] = pointer[i]; range[1] = pointer[i+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new float [nz_counter]; AtA_rowind = new int [nz_counter]; AtA_colptr = new int [n_row+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); /* for (int l=0; l< n_row; l++) { for (int j=0; j< n_row; j++) cout<<A_t_A[l][j]<<" "; cout<<endl; } for (int l=0; l<= n_row; l++) cout<<AtA_colptr[l]<<" "; cout<<endl; for (int l=0; l<nz_counter; l++) cout<<AtA_rowind[l]<<" "; cout<<endl; for (int l=0; l<nz_counter; l++) cout<<AtA_val[l]<<" "; cout<<endl<<endl; */ power_method(AtA_val, AtA_rowind, AtA_colptr, n_row, CV[i], CV[i], cluster_quality[i]); //for (int l=0; l<n_row; l++) // cout<<CV[i][l]<<" "; //cout<<cluster_quality[i]<<endl; delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } for (i=0; i< n_row; i++) delete [] A_t_A[i]; delete [] A_t_A; } else if ((flag >=0) && (flag<n_Clusters)) //compute eigval and eigvec for cluster 'flag' { range[0] = pointer[flag]; range[1] = pointer[flag+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new float [nz_counter]; AtA_rowind = new int [nz_counter]; AtA_colptr = new int [n_row+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); for (i=0; i< n_row; i++) delete [] A_t_A[i]; delete [] A_t_A; power_method(AtA_val, AtA_rowind, AtA_colptr, n_row, CV[flag], CV[flag], cluster_quality[flag]); delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } else if ((flag >= n_Clusters) && (flag <= 2*n_Clusters)) // compute eigval ONLY for cluster 'flag-n_Clusters', eigvec for cluster 'flag-n_Clusters' no change { range[0] = pointer[flag-n_Clusters]; range[1] = pointer[flag-n_Clusters+1]; A_trans_A(1, index, range, A_t_A, nz_counter); AtA_val = new float [nz_counter]; AtA_rowind = new int [nz_counter]; AtA_colptr = new int [n_row+1]; dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); for (i=0; i< n_row; i++) delete [] A_t_A[i]; delete [] A_t_A; power_method(AtA_val, AtA_rowind, AtA_colptr, n_row, NULL, CV[flag-n_Clusters], cluster_quality[flag-n_Clusters]); delete [] AtA_val; delete [] AtA_rowind; delete [] AtA_colptr; } delete [] pointer; delete [] index; delete [] range;}float SparseMatrix::euc_dis(float *v, int i, float norm_v) /* Given L2-norms of the vecs and v, norm[i] and norm_v, compute the squared Euc-dis between ith vec in the matrix and v, result is returned. Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c */{ float result=0.0; for (int j = colptrs[i]; j < colptrs[i+1]; j++) result += vals[j] * v[rowinds[j]]; result *= -2.0; result += norm[i]+norm_v; return result;}void SparseMatrix::euc_dis(float *x, float norm_x, float *result) /* Given L2-norms of the vecs and x, norm[i] and norm_x, compute the squared Euc-dis between each vec in the matrix with x, results are stored in array 'result'. Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c */{ for (int i = 0; i < n_col; i++) result[i] = euc_dis(x, i, norm_x);}float SparseMatrix::Kullback_leibler(float *x, int i, int laplace, float L1norm_x) /* Given the L1_norms of vec[i] and x, (vec[i] need be normalized before function-call compute KL divergence between vec[i] in the matrix with x, result is returned. Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) the KL is in unit of nats NOT bits. */{ float result=0.0, row_inv_alpha=alpha/n_row; if (priors[i] >0) { switch(laplace) { case NOLAPLACE: for (int j = colptrs[i]; j < colptrs[i+1]; j++) { if(x[rowinds[j]] >0.0) result += vals[j] * log(x[rowinds[j]]); else return HUGE_NUMBER; // if KL(vec[i], x) is inf. give it a huge number 1.0e20 } result = norm[i]-result+log(L1norm_x); break; case CENTER_LAPLACE: // this vector alpha is alpha (given by user) divided by |Y|, //row_inv_alpha is to make computation faster for (int j = colptrs[i]; j < colptrs[i+1]; j++) result += vals[j] * log(x[rowinds[j]]+row_inv_alpha*L1norm_x) ; result = norm[i]-result+log((1+alpha)*L1norm_x); break; case PRIOR_LAPLACE: // this vector alpha is alpha (given by user) divided by |X|*|Y|, //row_alpha is its L1-norm for (int j = colptrs[i]; j < colptrs[i+1]; j++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -