📄 densematrix.cc
字号:
/* DenseMatrix.cc Implementation of the DenseMatrix class Copyright (c) 2005, 2006 by Hyuk Cho Copyright (c) 2003, 2004 by Hyuk Cho, Yuqiang Guan, and Suvrit Sra {hyukcho, yguan, suvrit}@cs.utexas.edu*/#include <iostream>#include <fstream>#include <cmath>#include <assert.h>#include <stdlib.h>#include "DenseMatrix.h"DenseMatrix::DenseMatrix(int row, int col, double **val) : Matrix (row, col) /* numRow, numCol, and val give the dimensionality and entries of matrix. norm[] may be used to store the L2 norm of each vector and L1_norm[] may be used to store the L1 norm of each vector */{ numRow = row; numCol = col; value = val;}DenseMatrix::~DenseMatrix (){ if (norm != NULL) delete[] norm; if (L1_norm != NULL) delete[] L1_norm;}double DenseMatrix::dot_mult(double *x, int i) /* this function computes the dot-product between the ith vector in the dense matrix and vector x; result is returned. */{ double result = 0.0; for (int j = 0; j < numRow; j++) result += value[j][i] * x[j]; return result;}void DenseMatrix::trans_mult(double *x, double *result) /* Suppose A is the dense matrix, this function computes A^T x; the results is stored in array result[] */{ for (int i = 0; i < numCol; i++) result[i] = dot_mult(x, i);}void DenseMatrix::squared_trans_mult(double *x, double *result) /* Suppose A is the dense matrix, this function computes A^T x; the results is stored in array result[] */{ for (int i = 0; i < numCol; i++) result[i] = squared_dot_mult(x, i) ;}double DenseMatrix::squared_dot_mult(double *x, int i) /* this function computes the dot-product between the ith vector in the dense matrix and vector x; result is returned. */{ double result=0.0; for (int j = 0; j< numRow; j++) result += value[j][i] * x[j]; return result*result;}void DenseMatrix::A_trans_A(int flag, int * index, int *pointers, double ** A_t_A) /* computes A'A given doc_IDs in the same cluster; index[] contains doc_IDs for all docs, pointers[] gives the range of doc_ID belonging to the same cluster; the resulting matrix is stored in A_t_A[][] ( d by d matrix) Notice that Gene expression matrix is usually n by d where n is #genes; this matrix format is different from that of document matrix. In the main memory the matrix is stored in the format of document matrix. */{ int clustersize = pointers[1]-pointers[0]; if (flag > 0){ for (int i = 0; i < numRow; i++) for (int j = 0; j < numRow; j++){ A_t_A[i][j] = 0; for (int k = 0; k < clustersize; k++){ int tempCol = index[k+pointers[0]]; A_t_A[i][j] += value[i][tempCol] * value[j][tempCol]; } } } else { for (int i = 0; i < numRow; i++) for (int j = 0; j < numRow; j++){ A_t_A[i][j] = 0; for (int k = 0; k < clustersize; k++){ int tempCol = k + pointers[0]; A_t_A[i][j] += value[i][tempCol] * value[j][tempCol]; } } }}void DenseMatrix::right_dom_SV(int *cluster, int *cluster_size, int n_Clusters, double ** CV, double *cluster_quality, int flag){ int *pointer = new int[n_Clusters+1], *index = new int[numCol], *range = new int[2]; double **A_t_A; pointer[0] = 0; for (int i = 1; i < n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; for (int i = 0; i < numCol; i++){ int tempCluster = cluster[i]; index[pointer[tempCluster]] = i; pointer[tempCluster]++; } pointer[0] = 0; for (int i = 1; i <= n_Clusters; i++) pointer[i] = pointer[i-1] + cluster_size[i-1]; A_t_A =new double *[numRow]; for (int i = 0; i < numRow; i++) A_t_A [i] = new double[numRow]; if (flag < 0){ for (int i = 0; i < n_Clusters; i++){ range[0] = pointer[i]; range[1] = pointer[i+1]; A_trans_A(1, index, range, A_t_A); /* for (int k=0; k< numRow; k++){ for (int j=0; j< numRow; j++) cout<<A_t_A[k][j]<<" "; cout<<endl; }*/ power_method(A_t_A, numRow, CV[i], CV[i], cluster_quality[i]); } } else if ((flag >= 0) && (flag < n_Clusters)){ range[0] = pointer[flag]; range[1] = pointer[flag+1]; A_trans_A(1, index, range, A_t_A); power_method(A_t_A, numRow, CV[flag], CV[flag], cluster_quality[flag]); } else if ((flag >= n_Clusters) && (flag <2*n_Clusters)){ range[0] = pointer[flag-n_Clusters]; range[1] = pointer[flag-n_Clusters+1]; A_trans_A(1, index, range, A_t_A); power_method(A_t_A, numRow, NULL, CV[flag-n_Clusters], cluster_quality[flag-n_Clusters]); } delete [] pointer; delete [] index; delete [] range; for (int i = 0; i < numRow; i++) delete [] A_t_A[i]; delete [] A_t_A;}double DenseMatrix::euc_dis(double *x, int i, double norm_x) /* Given squared L2-norms of the vecs and v, norm[i] and norm_v, compute the Euc-dis between ith vec in the matrix and v, result is returned. Used (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c */{ double result=0.0; for (int j = 0; j < numRow; j++) result += x[j] * value[j][i]; result *= -2.0; result += norm[i] + norm_x; return result;}void DenseMatrix::euc_dis(double *x, double norm_x, double *result) /* Given squared L2-norms of the vecs and x, norm[i] and norm_x, compute the Euc-dis between each vec in the matrix with x, results are stored in array 'result'. Since the matrix is dense, not taking advantage of (x-c)^T (x-c) = x^T x - 2 x^T c + c^T c but the abstract class definition needs the parameter of 'norm_x' */{ for (int i = 0; i < numCol; i++) result[i] = euc_dis(x, i, norm_x);}void DenseMatrix::Kullback_leibler(double *x, double *result, int laplace){ for (int i=0; i<numCol; i++) result [i] = Kullback_leibler(x, i, laplace);}double DenseMatrix::Kullback_leibler(double *x, int i, int laplace) // Given the KL-norm of vec[i], norm[i], (already considered prior) // compute KL divergence between vec[i] in the matrix with x, // result is returned. 'sum_log' is sum of logs of x[j] // Take advantage of KL(p, q) = // \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) // KL norm is in unit of nats NOT bits { double result = 0.0, row_inv_alpha = smoothingFactor / numRow, m_e = smoothingFactor; if (priors[i] > 0){ switch (laplace){ case NO_SMOOTHING: for (int j = 0; j < numRow; j++){ if(x[j] > 0.0) result += value[j][i] * log(x[j]); else if (value[j][i] > 0.0) return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20 } result = norm[i]-result; break; case UNIFORM_SMOOTHING: for (int j = 0; j < numRow; j++) result += value[j][i] * log(x[j] + row_inv_alpha); result = norm[i] - result + log(1 + smoothingFactor); break; case MAXIMUM_ENTROPY_SMOOTHING: for (int j = 0; j < numRow; j++) result += value[j][i] * log(x[j] + m_e * p_x[j]); result = norm[i] - result + log(1 + smoothingFactor); break; case LAPLACE_SMOOTHING: for (int j = 0; j < numRow; j++) result += (value[j][i] + row_inv_alpha) * log(x[j]); result = norm[i] - result / (1 + smoothingFactor); break; default: break; } } return result;}void DenseMatrix::Kullback_leibler(double *x, double *result, int laplace, double L1norm_x){ for (int i = 0; i < numCol; i++) result[i] = Kullback_leibler(x, i, laplace, L1norm_x);}double DenseMatrix::Kullback_leibler(double *x, int i, int laplace, double L1norm_x) // Given the KL-norm of vec[i], norm[i], (already considered prior) // compute KL divergence between vec[i] in the matrix with x, // result is returned. 'sum_log' is sum of logs of x[j] // Take advantage of KL(p, q) = // \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i) // KL norm is in unit of nats NOT bits{ double result = 0.0, row_inv_alpha = smoothingFactor * L1norm_x / numRow , m_e = smoothingFactor * L1norm_x; if (priors[i] > 0){ switch (laplace){ case NO_SMOOTHING: for (int j = 0; j < numRow; j++){ if (x[j] > 0.0) result += value[j][i] * log(x[j]); else if (value[j][i] > 0.0) return MY_DBL_MAX; // if KL(vec[i], x) is inf. give it a huge number 1.0e20 } result = norm[i] - result + log(L1norm_x); break; case UNIFORM_SMOOTHING: for (int j = 0; j < numRow; j++) result += value[j][i] * log(x[j] + row_inv_alpha); result = norm[i] - result + log((1+smoothingFactor) * L1norm_x); break; case MAXIMUM_ENTROPY_SMOOTHING: for (int j = 0; j < numRow; j++) result += value[j][i] * log(x[j] + m_e * p_x[j]); result = norm[i] - result + log((1+smoothingFactor) * L1norm_x); break; case LAPLACE_SMOOTHING: for (int j = 0; j < numRow; j++) result += (value[j][i] + row_inv_alpha) * log(x[j]); result = norm[i] - result / (1+smoothingFactor); break; default: break; } } return result;}double DenseMatrix::Jenson_Shannon(double *x, int i, double prior_x) // Given the prior of vec[i] // compute KL divergence between vec[i] in the matrix with x, // result in nats is returned. { double result = 0.0, *p_bar, p1, p2, tempPriorsI = priors[i]; if ((tempPriorsI > 0) && (prior_x > 0)){ p1 = tempPriorsI / (tempPriorsI + prior_x); p2 = prior_x / (tempPriorsI + prior_x); p_bar = new double[numRow]; for (int j = 0; j < numRow; j++) p_bar[j] = p2 * x[j] + p1 * value[j][i]; result = p1 * Kullback_leibler(p_bar, i, NO_SMOOTHING) + ::Kullback_leibler(x, p_bar, numRow) * p2; //result /= L1_norm[i] + l1n_x; delete [] p_bar; } return result; // the real JS value should be this result devided by L1_norm[i]+l1n_x}void DenseMatrix::Jenson_Shannon(double *x, double *result, double prior_x){ for (int i = 0; i < numCol; i++) result[i] = Jenson_Shannon(x, i, prior_x);}void DenseMatrix::computeNorm_2() /* compute the squared L2 norms of each vector in the dense matrix */{ if (norm == NULL){ norm = new double[numCol]; memoryUsed += numCol * sizeof(double); } for (int i = 0; i < numCol; i++){ norm[i] = 0.0; for (int j = 0; j < numRow; j++){ double tempValue = value[j][i]; norm[i] += tempValue * tempValue; } }}void DenseMatrix::computeNorm_1() /* compute the L1 norms of each vector in the dense matrix */{ if (L1_norm == NULL){ L1_norm = new double[numCol]; memoryUsed += numCol * sizeof(double); } for (int i = 0; i < numCol; i++){ L1_norm[i] = 0.0; for (int j = 0; j < numRow; j++) L1_norm[i] += fabs(value[j][i]); L1_sum += L1_norm [i]; }}void DenseMatrix::computeNorm_KL(int laplace) /* compute the KL norms of each vector p_i in the dense matrix i.e. \sum_x p_i(x) \log p_i(x) the norm[i] is in unit of nats NOT bits */{ double row_inv_alpha = smoothingFactor / numRow; Norm_sum = 0; if (norm == NULL){ norm = new double[numCol]; memoryUsed += numCol * sizeof(double); } switch (laplace){ case NO_SMOOTHING: case UNIFORM_SMOOTHING: for (int i = 0; i < numCol; i++){ norm[i] = 0.0; for (int j = 0; j < numRow; j++){ double tempValue = value[j][i]; if (tempValue > 0.0) norm[i] += tempValue * log(tempValue); } Norm_sum += norm[i] * priors[i]; } break; case MAXIMUM_ENTROPY_SMOOTHING: for (int i = 0; i < numCol; i++){ norm[i] = 0.0; for (int j = 0; j < numRow; j++){ double tempValue = value[j][i]; if (tempValue > 0.0) norm[i] += tempValue * log(tempValue); } Norm_sum += norm[i] * priors[i]; } if (p_x == NULL){ p_x = new double [numRow]; memoryUsed += numRow * sizeof(double); } for (int j = 0; j < numRow; j++){ p_x[j] =0; for (int i = 0; i < numCol; i++) p_x[j] += priors[i] * value[j][i]; } break; case LAPLACE_SMOOTHING: for (int i = 0; i < numCol; i++){ norm[i] = 0.0; for (int j = 0; j < numRow; j++){ double tempValue = value[j][i]; norm[i] += (tempValue + row_inv_alpha) * log(tempValue + row_inv_alpha); norm[i] = norm[i] / (1+smoothingFactor) + log(1+smoothingFactor); Norm_sum += norm[i] * priors[i]; } } break; default: break; } Norm_sum /= log(2.0);}void DenseMatrix::normalize_mat_L2() /* L2 normalize each vector in the dense matrix to have L2 norm 1 */{ for (int i = 0; i < numCol; i++){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -