📄 densematrix.cc

📁 一种聚类算法,名字是cocluster
💻 CC
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*   DenseMatrix.cc    Implementation of the DenseMatrix class    Copyright (c) 2005, 2006              by Hyuk Cho    Copyright (c) 2003, 2004    	      by Hyuk Cho, Yuqiang Guan, and Suvrit Sra                {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <cmath>#include <assert.h>#include <stdlib.h>#include "DenseMatrix.h"DenseMatrix::DenseMatrix(int row, int col, double **val) : Matrix (row, col)  /* numRow, numCol, and val give the dimensionality and entries of matrix.     norm[] may be used to store the L2 norm of each vector and     L1_norm[] may be used to store the L1 norm of each vector  */{  numRow = row;  numCol = col;  value = val;}DenseMatrix::~DenseMatrix (){  if (norm != NULL)    delete[] norm;  if (L1_norm != NULL)    delete[] L1_norm;}double DenseMatrix::dot_mult(double *x, int i)   /* this function computes the dot-product between the ith vector in the dense matrix     and vector x; result is returned.  */{  double result = 0.0;  for (int j = 0; j < numRow; j++)    result += value[j][i] * x[j];  return result;}void DenseMatrix::trans_mult(double *x, double *result)   /* Suppose A is the dense matrix, this function computes A^T x;     the results is stored in array result[]  */{  for (int i = 0; i < numCol; i++)    result[i] = dot_mult(x, i);}void DenseMatrix::squared_trans_mult(double *x, double *result)   /* Suppose A is the dense matrix, this function computes A^T x;     the results is stored in array result[]  */{  for (int i = 0; i < numCol; i++)    result[i] = squared_dot_mult(x, i) ;}double DenseMatrix::squared_dot_mult(double *x, int i)   /* this function computes the dot-product between the ith vector in the dense matrix     and vector x; result is returned.  */{  double result=0.0;  for (int j = 0; j< numRow; j++)    result += value[j][i] * x[j];  return result*result;}void DenseMatrix::A_trans_A(int flag, int * index, int *pointers, double ** A_t_A)  /* computes A'A given doc_IDs in the same cluster; index[] contains doc_IDs for all docs,      pointers[] gives the range of doc_ID belonging to the same cluster;     the resulting matrix is stored in A_t_A[][] ( d by d matrix)     Notice that Gene expression matrix is usually n by d where n is #genes; this matrix     format is different from that of document matrix.      In the main memory the matrix is stored in the format of document matrix.  */{  int clustersize = pointers[1]-pointers[0];  if (flag > 0){    for (int i = 0; i < numRow; i++)      for (int j = 0; j < numRow; j++){        A_t_A[i][j] = 0;        for (int k = 0; k < clustersize; k++){	  int tempCol = index[k+pointers[0]];          A_t_A[i][j] += value[i][tempCol] * value[j][tempCol];	}      }  } else {    for (int i = 0; i < numRow; i++)      for (int j = 0; j < numRow; j++){        A_t_A[i][j] = 0;        for (int k = 0; k < clustersize; k++){	  int tempCol = k + pointers[0];          A_t_A[i][j] += value[i][tempCol] * value[j][tempCol];	}      }  }}void DenseMatrix::right_dom_SV(int *cluster, int *cluster_size, int n_Clusters, double ** CV, double *cluster_quality, int flag){  int *pointer = new int[n_Clusters+1], *index = new int[numCol], *range = new int[2];  double **A_t_A;  pointer[0] = 0;  for (int i = 1; i < n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  for (int i = 0; i < numCol; i++){    int tempCluster = cluster[i];    index[pointer[tempCluster]] = i;    pointer[tempCluster]++;  }  pointer[0] = 0;  for (int i = 1; i <= n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  A_t_A =new double *[numRow];  for (int i = 0; i < numRow; i++)    A_t_A [i] = new double[numRow];  if (flag < 0){    for (int i = 0; i < n_Clusters; i++){      range[0] = pointer[i];      range[1] = pointer[i+1];      A_trans_A(1, index, range, A_t_A); /*            for (int k=0; k< numRow; k++){        for (int j=0; j< numRow; j++)	  cout<<A_t_A[k][j]<<" ";	cout<<endl;	}*/      power_method(A_t_A, numRow, CV[i], CV[i], cluster_quality[i]);   }  } else if ((flag >= 0) && (flag < n_Clusters)){    range[0] = pointer[flag];    range[1] = pointer[flag+1];    A_trans_A(1, index, range, A_t_A);    power_method(A_t_A, numRow, CV[flag], CV[flag], cluster_quality[flag]);  } else if ((flag >= n_Clusters) && (flag <2*n_Clusters)){    range[0] = pointer[flag-n_Clusters];    range[1] = pointer[flag-n_Clusters+1];    A_trans_A(1, index, range, A_t_A);    power_method(A_t_A, numRow, NULL, CV[flag-n_Clusters], cluster_quality[flag-n_Clusters]);  }  delete [] pointer;  delete [] index;  delete [] range;  for (int i = 0; i < numRow; i++)    delete [] A_t_A[i];  delete [] A_t_A;}double DenseMatrix::euc_dis(double *x, int i, double norm_x)  /* Given squared L2-norms of the vecs and v, norm[i] and norm_v,     compute the Euc-dis between ith vec in the matrix and v,     result is returned.     Used (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c  */{  double result=0.0;  for (int j = 0; j < numRow; j++)    result += x[j] * value[j][i];  result *= -2.0;  result += norm[i] + norm_x;  return result;}void DenseMatrix::euc_dis(double *x, double norm_x, double *result)  /* Given squared L2-norms of the vecs and x, norm[i] and norm_x,     compute the Euc-dis between each vec in the matrix with x,       results are stored in array 'result'.      Since the matrix is dense, not taking advantage of      (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c     but the abstract class definition needs the parameter of 'norm_x'  */{  for (int i = 0; i < numCol; i++)    result[i] = euc_dis(x, i, norm_x);}void DenseMatrix::Kullback_leibler(double *x, double *result, int laplace){  for (int i=0; i<numCol; i++)    result [i] = Kullback_leibler(x, i, laplace);}double DenseMatrix::Kullback_leibler(double *x, int i, int laplace)  // Given the KL-norm of vec[i], norm[i], (already considered prior)  //   compute KL divergence between vec[i] in the matrix with x,  //   result is returned. 'sum_log' is sum of logs of x[j]  //   Take advantage of KL(p, q) =   //  \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  // KL norm is in unit of nats NOT bits  {  double result = 0.0, row_inv_alpha = smoothingFactor / numRow, m_e = smoothingFactor;  if (priors[i] > 0){    switch (laplace){      case NO_SMOOTHING:        for (int j = 0; j < numRow; j++){          if(x[j] > 0.0)            result += value[j][i] * log(x[j]);          else if (value[j][i] > 0.0)	    return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20        }        result = norm[i]-result;	break;      case UNIFORM_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += value[j][i] * log(x[j] + row_inv_alpha);        result = norm[i] - result + log(1 + smoothingFactor);	break;      case MAXIMUM_ENTROPY_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += value[j][i] * log(x[j] + m_e * p_x[j]);	result = norm[i] - result + log(1 + smoothingFactor);	break;      case LAPLACE_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += (value[j][i] + row_inv_alpha) * log(x[j]);	result = norm[i] - result / (1 + smoothingFactor);	break;      default:        break;    }  }  return result;}void DenseMatrix::Kullback_leibler(double *x, double *result, int laplace, double L1norm_x){  for (int i = 0; i < numCol; i++)    result[i] = Kullback_leibler(x, i, laplace, L1norm_x);}double DenseMatrix::Kullback_leibler(double *x, int i, int laplace, double L1norm_x)  // Given the KL-norm of vec[i], norm[i], (already considered prior)  //   compute KL divergence between vec[i] in the matrix with x,  //   result is returned. 'sum_log' is sum of logs of x[j]  //   Take advantage of KL(p, q) =   //  \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  // KL norm is in unit of nats NOT bits{  double result = 0.0, row_inv_alpha = smoothingFactor * L1norm_x / numRow , m_e = smoothingFactor * L1norm_x;  if (priors[i] > 0){    switch (laplace){      case NO_SMOOTHING:        for (int j = 0; j < numRow; j++){          if (x[j] > 0.0)            result += value[j][i] * log(x[j]);          else if (value[j][i] > 0.0)            return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20	}        result = norm[i] - result + log(L1norm_x);        break;      case UNIFORM_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += value[j][i] * log(x[j] + row_inv_alpha);        result = norm[i] - result + log((1+smoothingFactor) * L1norm_x);        break;      case MAXIMUM_ENTROPY_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += value[j][i] * log(x[j] + m_e * p_x[j]);        result = norm[i] - result + log((1+smoothingFactor) * L1norm_x);        break;       case LAPLACE_SMOOTHING:        for (int j = 0; j < numRow; j++)          result += (value[j][i] + row_inv_alpha) * log(x[j]);        result = norm[i] - result / (1+smoothingFactor);        break;      default:        break;    }  }  return result;}double DenseMatrix::Jenson_Shannon(double *x, int i, double prior_x)  // Given the prior of vec[i]  //   compute KL divergence between vec[i] in the matrix with x,  //   result in nats is returned.   {  double result = 0.0, *p_bar, p1, p2, tempPriorsI = priors[i];   if ((tempPriorsI > 0) && (prior_x > 0)){    p1 = tempPriorsI / (tempPriorsI + prior_x);    p2 = prior_x / (tempPriorsI + prior_x);    p_bar = new double[numRow];    for (int j = 0; j < numRow; j++)      p_bar[j] = p2 * x[j] + p1 * value[j][i];    result = p1 * Kullback_leibler(p_bar, i, NO_SMOOTHING) + ::Kullback_leibler(x, p_bar, numRow) * p2;     //result /= L1_norm[i] + l1n_x;    delete [] p_bar;  }  return result; // the real JS value should be this result devided by L1_norm[i]+l1n_x}void DenseMatrix::Jenson_Shannon(double *x, double *result, double prior_x){  for (int i = 0; i < numCol; i++)    result[i] = Jenson_Shannon(x, i, prior_x);}void DenseMatrix::computeNorm_2()  /* compute the squared L2 norms of each vector in the dense matrix   */{  if (norm == NULL){    norm = new double[numCol];    memoryUsed += numCol * sizeof(double);  }  for (int i = 0; i < numCol; i++){    norm[i] = 0.0;    for (int j = 0; j < numRow; j++){      double tempValue = value[j][i];      norm[i] += tempValue * tempValue;    }  }}void DenseMatrix::computeNorm_1() /* compute the L1 norms of each vector in the dense matrix   */{  if (L1_norm == NULL){    L1_norm = new double[numCol];    memoryUsed += numCol * sizeof(double);  }  for (int i = 0; i < numCol; i++){    L1_norm[i] = 0.0;    for (int j = 0; j < numRow; j++)      L1_norm[i] += fabs(value[j][i]);    L1_sum += L1_norm [i];  }}void DenseMatrix::computeNorm_KL(int laplace)  /* compute the KL norms of each vector p_i in the dense matrix     i.e. \sum_x p_i(x) \log p_i(x)     the norm[i] is in unit of nats NOT bits   */{  double row_inv_alpha = smoothingFactor / numRow;  Norm_sum = 0;  if (norm == NULL){    norm = new double[numCol];    memoryUsed += numCol * sizeof(double);  }  switch (laplace){    case NO_SMOOTHING:    case UNIFORM_SMOOTHING:      for (int i = 0; i < numCol; i++){        norm[i] = 0.0;        for (int j = 0; j < numRow; j++){	  double tempValue = value[j][i];          if (tempValue > 0.0)	    norm[i] += tempValue * log(tempValue);	}        Norm_sum += norm[i] * priors[i];      }      break;    case MAXIMUM_ENTROPY_SMOOTHING:      for (int i = 0; i < numCol; i++){        norm[i] = 0.0;        for (int j = 0; j < numRow; j++){	  double tempValue = value[j][i];          if (tempValue > 0.0)            norm[i] += tempValue * log(tempValue);	}	Norm_sum += norm[i] * priors[i];      }      if (p_x == NULL){        p_x = new double [numRow];        memoryUsed += numRow * sizeof(double);      }      for (int j = 0; j < numRow; j++){         p_x[j] =0;         for (int i = 0; i < numCol; i++)           p_x[j] += priors[i] * value[j][i];      }      break;    case LAPLACE_SMOOTHING:      for (int i = 0; i < numCol; i++){        norm[i] = 0.0;        for (int j = 0; j < numRow; j++){	  double tempValue = value[j][i];          norm[i] += (tempValue + row_inv_alpha) * log(tempValue + row_inv_alpha);          norm[i] = norm[i] / (1+smoothingFactor) + log(1+smoothingFactor);          Norm_sum += norm[i] * priors[i];        }      }      break;    default:      break;  }  Norm_sum /= log(2.0);}void DenseMatrix::normalize_mat_L2()  /* L2 normalize each vector in the dense matrix to have L2 norm 1   */{  for (int i = 0; i < numCol; i++){
12 3 下一页
💿 文件大小 75 K
👤 上传用户 lwp
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#cocluster #聚类算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -