📄 sparsematrix.cc

📁 一种聚类算法,名字是cocluster
💻 CC
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*   SparseMatrix.cc    Implementation of the SparseMatrix class    Copyright (c) 2005, 2006              by Hyuk Cho    Copyright (c) 2003, 2004    	      by Hyuk Cho, Yuqiang Guan, and Suvrit Sra                {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <cmath>#include <assert.h>#include <stdlib.h>#include "SparseMatrix.h"SparseMatrix::SparseMatrix(int row, int col, int nz, double *val, int *rowind, int *colptr) : Matrix(row, col){  numRow = row;  numCol = col;  numVal  = nz;  value = val;  colPtr = colptr;  rowIdx = rowind;}SparseMatrix::~SparseMatrix(){  if (norm != NULL)    delete[] norm;  if (L1_norm != NULL)    delete[] L1_norm;}double SparseMatrix::operator()(int i, int j) const{  assert(i >= 0 && i < numRow && j >= 0 && j < numCol);  for (int t = colPtr[j]; t < colPtr[j+1]; t++)    if (rowIdx[t] == i)       return value[t];  return 0.0;}double SparseMatrix::dot_mult(double *v, int i)  /*compute the dot-product between the ith vector (in the sparse matrix) with vector v    result is returned.   */{  double result = 0.0;  for (int j = colPtr[i]; j < colPtr[i+1]; j++)    result += value[j] * v[rowIdx[j]];  return result;}void SparseMatrix::trans_mult(double *x, double *result)   /*compute the dot-product between every vector (in the sparse matrix) with vector v    results are stored in array 'result'.   */{  for (int i = 0; i < numCol; i++)    result[i] = dot_mult(x, i);}double SparseMatrix::squared_dot_mult(double *v, int i)  /*compute the dot-product between the ith vector (in the sparse matrix) with vector v    result is returned.   */{  double result = 0.0;  for (int j = colPtr[i]; j < colPtr[i+1]; j++)    result += value[j] * v[rowIdx[j]];  return result * result;}void SparseMatrix::squared_trans_mult(double *x, double *result)   /*compute the dot-product between every vector (in the sparse matrix) with vector v    results are stored in array 'result'.   */{  for (int i = 0; i < numCol; i++)    result[i] = squared_dot_mult(x, i);}void SparseMatrix::A_trans_A(int flag, int * index, int *pointers, double **A_t_A, int & nz_counter)/* computes A'A given doc_IDs in the same cluster; index[] contains doc_IDs for all docs,      pointers[] gives the range of doc_ID belonging to the same cluster;     the resulting matrix is stored in dense format A_t_A( d by d matrix)     The right SVs of A are the corresponding EVs of A'A      Notice that Gene expression matrix is usually n by d where n is #genes; this matrix     format is different from that of document matrix.      In the main memory the matrix is stored in the format of document matrix.  */{  int clustersize = pointers[1] - pointers[0];   for (int i = 0; i < numRow; i++)    for (int j = 0; j < numRow; j++)      A_t_A[i][j] = 0;  nz_counter = 0;  if (flag > 0){    for (int k = 0; k < clustersize; k++)      for (int l = colPtr[index[k+pointers[0]]]; l < colPtr[index[k+pointers[0]]+1]; l++)        for (int j = colPtr[index[k+pointers[0]]]; j < colPtr[index[k+pointers[0]]+1]; j++)	  A_t_A[rowIdx[l]][rowIdx[j]] += value[l] * value[j];  } else {    for (int k = 0; k < clustersize; k++)      for (int l = colPtr[k+pointers[0]]; l < colPtr[k+pointers[0]+1]; l++)        for (int j = colPtr[k+pointers[0]]; j < colPtr[k+pointers[0]+1]; j++)          A_t_A[rowIdx[l]][rowIdx[j]] += value[l] * value[j];  }  for (int i = 0; i < numRow; i++)    for (int j = 0; j < numRow; j++)      if (A_t_A[i][j] > 0)        nz_counter++;}void SparseMatrix::dense_2_sparse(int* AtA_colptr, int *AtA_rowind, double *AtA_val, double **A_t_A){  int k = 0;  AtA_colptr[0] = 0;  for (int j = 0; j < numRow; j++){    int l = 0;    for (int i = 0; i < numRow; i++)      if (A_t_A[i][j] > 0){        AtA_val[k] = A_t_A[i][j];        AtA_rowind[k++] = i;        l++;      }    AtA_colptr[j+1] = AtA_colptr[j] + l;  }}void SparseMatrix::right_dom_SV(int *cluster, int *cluster_size, int n_Clusters, double ** CV, double *cluster_quality, int flag)  /*      flag == -1, then update all the centroids and the cluster qualities;      0 <= flag < n_Clusters , then update the centroid and the cluster quality specified by 'flag';     n_Clusters <= flag <2*n_Clusters, then update the cluster quality specified by 'flag-n_Clusters';  */{  int *pointer = new int[n_Clusters+1], *index = new int[numCol], *range =new int[2], nz_counter;  int *AtA_rowind = NULL, *AtA_colptr = NULL;  double *AtA_val = NULL;  double **A_t_A;  A_t_A =new double *[numRow];  for (int i = 0; i < numRow; i++)    A_t_A[i] = new double[numRow];  pointer[0] = 0;  for (int i = 1; i < n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  for (int i = 0; i < numCol; i++){    index[pointer[cluster[i]]] = i;    pointer[cluster[i]]++;  }  pointer[0] = 0;  for (int i = 1; i <= n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  if (flag <0){ // compute eigval and eigvec for all clusters    for (int i = 0; i < n_Clusters; i++){      range[0] = pointer[i];      range[1] = pointer[i+1];      A_trans_A(1, index, range, A_t_A, nz_counter);      AtA_val = new double [nz_counter];      AtA_rowind = new int [nz_counter];      AtA_colptr = new int [numRow+1];      dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A); /*      for (int l = 0; l < numRow; l++){        for (int j = 0; j < numRow; j++)          cout << A_t_A[l][j] << " ";        cout<<endl;      }      for (int l = 0; l <= numRow; l++)        cout << AtA_colptr[l] << " ";      cout << endl;      for (int l = 0; l < nz_counter; l++)        cout << AtA_rowind[l] << " ";      cout << endl;      for (int l = 0; l < nz_counter; l++)        cout << AtA_val[l] << " ";      cout<<endl<<endl; */      power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, CV[i], CV[i], cluster_quality[i]);/*      for (int l = 0; l < numRow; l++)        cout<<CV[i][l]<<" ";      cout<<cluster_quality[i]<<endl;*/	        delete [] AtA_val;      delete [] AtA_rowind;      delete [] AtA_colptr;    }     for (int i = 0; i < numRow; i++)	delete [] A_t_A[i];     delete [] A_t_A;  } else if ((flag >= 0) && (flag < n_Clusters)){ //compute eigval and eigvec for cluster 'flag'    range[0] = pointer[flag];    range[1] = pointer[flag+1];    A_trans_A(1, index, range, A_t_A, nz_counter);    AtA_val = new double[nz_counter];    AtA_rowind = new int[nz_counter];    AtA_colptr = new int[numRow+1];    dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A);    for (int i = 0; i < numRow; i++)      delete [] A_t_A[i];    delete [] A_t_A;    power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, CV[flag], CV[flag], cluster_quality[flag]);    delete [] AtA_val;    delete [] AtA_rowind;    delete [] AtA_colptr;  } else if ((flag >= n_Clusters) && (flag <= 2 * n_Clusters)){    // compute eigval ONLY for cluster 'flag-n_Clusters', eigvec for  cluster 'flag-n_Clusters' no change    range[0] = pointer[flag-n_Clusters];    range[1] = pointer[flag-n_Clusters+1];    A_trans_A(1, index, range, A_t_A, nz_counter);    AtA_val = new double [nz_counter];    AtA_rowind = new int [nz_counter];    AtA_colptr = new int [numRow+1];    dense_2_sparse(AtA_colptr, AtA_rowind, AtA_val, A_t_A);    for (int i = 0; i < numRow; i++)    delete [] A_t_A[i];    delete [] A_t_A;    power_method(AtA_val, AtA_rowind, AtA_colptr, numRow, NULL, CV[flag-n_Clusters], cluster_quality[flag-n_Clusters]);    delete [] AtA_val;    delete [] AtA_rowind;    delete [] AtA_colptr;  }  delete [] pointer;  delete [] index;  delete [] range;}double SparseMatrix::euc_dis(double *v, int i, double norm_v)  /* Given L2-norms of the vecs and v, norm[i] and norm_v,     compute the squared Euc-dis between ith vec in the matrix and v,     result is returned.     Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c  */{  double result = 0.0;  for (int j = colPtr[i]; j < colPtr[i+1]; j++)    result += value[j] * v[rowIdx[j]];  result *= -2.0;  result += norm[i]+norm_v;  return result;}void SparseMatrix::euc_dis(double *x, double norm_x, double *result)  /* Given L2-norms of the vecs and x, norm[i] and norm_x,     compute the squared Euc-dis between each vec in the matrix with x,       results are stored in array 'result'.      Take advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c  */{  for (int i = 0; i < numCol; i++)    result[i] = euc_dis(x, i, norm_x);}double SparseMatrix::Kullback_leibler(double *x, int i, int laplace, double L1norm_x)  /* Given the L1_norms of vec[i] and x, (vec[i] need be normalized before function-call     compute KL divergence between vec[i] in the matrix with x,     result is returned.      Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)     the KL is in unit of nats NOT bits.  */{  double result = 0.0, row_inv_alpha = smoothingFactor * L1norm_x / numRow, m_e = smoothingFactor * L1norm_x;  if (priors[i] > 0){    switch (laplace){      case NO_SMOOTHING:        for (int j = colPtr[i]; j < colPtr[i+1]; j++){	  double tempValue = x[rowIdx[j]];          if(tempValue > 0.0)            result += value[j] * log(tempValue);          else            return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20        }        result = norm[i] - result + log(L1norm_x);	break;      case UNIFORM_SMOOTHING:        // this vector alpha is alpha (given by user) divided by |Y|,        //row_inv_alpha is to make computation faster        for (int j = colPtr[i]; j < colPtr[i+1]; j++)           result += value[j] * log(x[rowIdx[j]] + row_inv_alpha);        result = norm[i]-result+log((1+smoothingFactor)*L1norm_x);	break;      case MAXIMUM_ENTROPY_SMOOTHING:        // this vector alpha is alpha (given by user) divided by |Y|,        //row_inv_alpha is to make computation faster        for (int j = colPtr[i]; j < colPtr[i+1]; j++)          result += value[j] * log(x[rowIdx[j]] + m_e * p_x[rowIdx[j]]);	result = norm[i]-result+log((1+smoothingFactor)*L1norm_x);	break;      case LAPLACE_SMOOTHING:			// not used in co-clustering...        // this vector alpha is alpha (given by user) divided by |X|*|Y|,        //row_alpha is its L1-norm        for (int j = colPtr[i]; j < colPtr[i+1]; j++)          result += (value[j]+row_inv_alpha) * log(x[rowIdx[j]]) ;        result = norm[i]-result/(1+smoothingFactor);        break;      default:        break;    }  }  return result;}void SparseMatrix::Kullback_leibler(double *x, double *result, int laplace, double L1norm_x)  // Given the KL-norm of the vecs, norm[i] (already considered prior),  //   compute KL divergence between each vec in the matrix with x,  //   results are stored in array 'result'.   //   Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  {  for (int i = 0; i < numCol; i++)    result[i] = Kullback_leibler(x, i, laplace, L1norm_x);  }double SparseMatrix::Kullback_leibler(double *x, int i, int laplace)  /* Given the L1_norms of vec[i] and x, (vec[i] and x need be normalized before function-call     compute KL divergence between vec[i] in the matrix with x,     result is returned.      Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)     the KL is in unit of nats NOT bits.  */{  double result = 0.0, row_inv_alpha = smoothingFactor / numRow, m_e = smoothingFactor;  if (priors[i] > 0){    switch (laplace){      case NO_SMOOTHING:        for (int j = colPtr[i]; j < colPtr[i+1]; j++){          if (x[rowIdx[j]] > 0.0)            result += value[j] * log(x[rowIdx[j]]);          else            return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20        }        result = norm[i] - result;	break;      case UNIFORM_SMOOTHING:        // this vector alpha is alpha (given by user) divided by |Y|,        //row_inv_alpha is to make computation faster        for (int j = colPtr[i]; j < colPtr[i+1]; j++)          result += value[j] * log(x[rowIdx[j]] + row_inv_alpha);        result = norm[i]-result+log(1+smoothingFactor);        break;      case MAXIMUM_ENTROPY_SMOOTHING:        // this vector alpha is alpha (given by user) divided by |Y|,        //row_inv_alpha is to make computation faster        for (int j = colPtr[i]; j < colPtr[i+1]; j++)          result += value[j] * log(x[rowIdx[j]] + m_e * p_x[rowIdx[j]]);        result = norm[i]-result+log(1+smoothingFactor);        break;      case LAPLACE_SMOOTHING:        // this vector alpha is alpha (given by user) divided by |X|*|Y|,        //row_alpha is its L1-norm        for (int j = colPtr[i]; j < colPtr[i+1]; j++)          result += (value[j] + row_inv_alpha) * log(x[rowIdx[j]]) ;        result = norm[i]-result/(1+smoothingFactor);        break;      default:        break;    }  }  return result;}void SparseMatrix::Kullback_leibler(double *x, double *result, int laplace)  // Given the KL-norm of the vecs, norm[i] (already considered prior),  //   compute KL divergence between each vec in the matrix with x,  //   results are stored in array 'result'.   //   Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  {  for (int  i = 0; i < numCol; i++)    result[i] = Kullback_leibler(x, i, laplace);  }double SparseMatrix::Jenson_Shannon(double *x, int i, double prior_x)  /* Given the prior of vec[i],     compute JS divergence between vec[i] in the data matrix with x,     result in nats is returned.   */{  double result = 0.0, *p_bar, p1, p2;  if ((priors[i] > 0) && (prior_x > 0)){    p1 = priors[i] / (priors[i] + prior_x);    p2 = prior_x / (priors[i] + prior_x);    p_bar = new double[numRow];    for (int j = 0; j < numRow; j++)      p_bar[j] = p2 * x[j];    for (int j = colPtr[i]; j < colPtr[i+1]; j++)      p_bar[rowIdx[j]] += p1 * value[j];    result = p1 * Kullback_leibler(p_bar, i, NO_SMOOTHING) + ::Kullback_leibler(x, p_bar, numRow) * p2;     delete [] p_bar;  }  return result; // the real JS value should be this result devided by L1_norm[i]+l1n_x}void SparseMatrix::Jenson_Shannon(double *x, double *result, double prior_x)  /* Given the prior of vec[i] and x; vec[i] and x are all normalized     compute JS divergence between all vec[i] in the data matrix with x,     result in nats.   */{    for (int i = 0; i < numCol; i++)    result[i] = Jenson_Shannon(x, i, prior_x);}void SparseMatrix::computeNorm_2()  /* compute the squared L-2 norms for each vec in the matrix     first check if array 'norm' has been given memory space   */{  if (norm == NULL){    norm = new double[numCol];    memoryUsed += numCol * sizeof(double);  }  for (int i = 0; i < numCol; i++){    norm[i] = 0.0;    for (int j = colPtr[i]; j < colPtr[i+1]; j++){      double tempValue = value[j];      norm[i] += tempValue * tempValue;    }  }}void SparseMatrix::computeNorm_1()
12 3 下一页
💿 文件大小 75 K
👤 上传用户 lwp
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#cocluster #聚类算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -