📄 sparsematrix.cc

📁 gmeans-- Clustering with first variation and splitting 文本聚类算法Gmeans ,使用了3种相似度函数,cosine,euclidean ,K
💻 CC
📖 第 1 页 / 共 2 页
字号:
上一页 12
	    result += (vals[j]+row_inv_alpha) * log(x[rowinds[j]]) ;	  	  result = norm[i]-result/(1+alpha);	  break;	}    }  return result;}void SparseMatrix::Kullback_leibler(float *x, float *result, int laplace, float L1norm_x)  // Given the KL-norm of the vecs, norm[i] (already considered prior),  //   compute KL divergence between each vec in the matrix with x,  //   results are stored in array 'result'.   //   Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  {  int i;  for ( i = 0; i < n_col; i++)    result[i] = Kullback_leibler(x, i, laplace,L1norm_x);  }float SparseMatrix::Kullback_leibler(float *x, int i, int laplace)  /* Given the L1_norms of vec[i] and x, (vec[i] and x need be normalized before function-call     compute KL divergence between vec[i] in the matrix with x,     result is returned.      Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)     the KL is in unit of nats NOT bits.  */{  float result=0.0, row_inv_alpha=alpha/n_row;  if (priors[i] >0)    {      switch(laplace)	{	case NOLAPLACE:	  for (int j = colptrs[i]; j < colptrs[i+1]; j++)	    {	      if(x[rowinds[j]] >0.0)		result += vals[j] * log(x[rowinds[j]]);	      else		return HUGE_NUMBER; // if KL(vec[i], x) is inf. give it a huge number 1.0e20	    }     	  result = norm[i]-result;	  break;	case CENTER_LAPLACE:	  // this vector alpha is alpha (given by user) divided by |Y|,	  //row_inv_alpha is to make computation faster	  for (int j = colptrs[i]; j < colptrs[i+1]; j++)	    result += vals[j] * log(x[rowinds[j]]+row_inv_alpha) ;	  	  result = norm[i]-result+log(1+alpha);	  break;	case PRIOR_LAPLACE:	  // this vector alpha is alpha (given by user) divided by |X|*|Y|,	  //row_alpha is its L1-norm	  for (int j = colptrs[i]; j < colptrs[i+1]; j++)	    result += (vals[j]+row_inv_alpha) * log(x[rowinds[j]]) ;	  	  result = norm[i]-result/(1+alpha);	  break;	}    }  return result;}void SparseMatrix::Kullback_leibler(float *x, float *result, int laplace)  // Given the KL-norm of the vecs, norm[i] (already considered prior),  //   compute KL divergence between each vec in the matrix with x,  //   results are stored in array 'result'.   //   Take advantage of KL(p, q) = \sum_i p_i log(p_i) - \sum_i p_i log(q_i) = norm[i] - \sum_i p_i log(q_i)  {  int i;  for ( i = 0; i < n_col; i++)    result[i] = Kullback_leibler(x, i, laplace);  }float SparseMatrix::Jenson_Shannon(float *x, int i, float prior_x)  /* Given the prior of vec[i],     compute JS divergence between vec[i] in the data matrix with x,     result in nats is returned.   */{  float result=0.0, * p_bar, p1, p2;    if ((priors[i] >0) && (prior_x >0))    {      p1=priors[i]/(priors[i]+prior_x);      p2=prior_x/(priors[i]+prior_x);      p_bar = new float [n_row];      for (int j=0; j< n_row; j++)	p_bar[j] = p2*x[j];            for (int j = colptrs[i]; j < colptrs[i+1]; j++)	p_bar[rowinds[j]] += p1*vals[j];      result = p1* Kullback_leibler(p_bar, i, NOLAPLACE)	+ ::Kullback_leibler(x, p_bar, n_row)*p2 ;       delete [] p_bar;    }  return result; // the real JS value should be this result devided by L1_norm[i]+l1n_x}void SparseMatrix::Jenson_Shannon(float *x, float *result, float prior_x)  /* Given the prior of vec[i] and x; vec[i] and x are all normalized     compute JS divergence between all vec[i] in the data matrix with x,     result in nats.   */{    int i;  for ( i = 0; i < n_col; i++)    result[i] = Jenson_Shannon(x, i, prior_x);}void SparseMatrix::ComputeNorm_2()  /* compute the squared L-2 norms for each vec in the matrix     first check if array 'norm' has been given memory space   */{  if (norm == NULL)    {      norm = new float [n_col];      memory_used += n_col*sizeof(float);    }  for (int i = 0; i < n_col; i++)    {      norm[i] =0.0;      for (int j = colptrs[i]; j < colptrs[i+1]; j++)	norm[i] += vals[j] * vals[j];    }}void SparseMatrix::ComputeNorm_1()  /* compute the squared L-2 norms for each vec in the matrix     first check if array 'norm' has been given memory space   */{  if (L1_norm == NULL)    {      L1_norm = new float [n_col];      memory_used += n_col*sizeof(float);    }  for (int i = 0; i < n_col; i++)    {      L1_norm[i] =0.0;      for (int j = colptrs[i]; j < colptrs[i+1]; j++)	L1_norm[i] += vals[j] ;    }}void SparseMatrix::ComputeNorm_KL(int laplace)  // the norm[i] is in unit of nats NOT bits{  float row_inv_alpha=alpha/n_row;  if (norm == NULL)    {      norm = new float [n_col];      memory_used += n_col*sizeof(float);    }  Norm_sum=0;  switch (laplace)    {    case NOLAPLACE:    case CENTER_LAPLACE:      for (int i = 0; i < n_col; i++)	{	  norm[i] =0.0;	  for (int j = colptrs[i]; j < colptrs[i+1]; j++)	    norm[i] += vals[j] * log(vals[j]);	  Norm_sum +=norm[i]*priors[i];	}      break;    case PRIOR_LAPLACE:      for (int i = 0; i < n_col; i++)	{	  norm[i] =0.0;	  for (int j = colptrs[i]; j < colptrs[i+1]; j++)	    norm[i] += (vals[j]+row_inv_alpha) * log(vals[j]+row_inv_alpha) ;	  norm[i] += (n_row-(colptrs[i+1]-colptrs[i]))*row_inv_alpha*log(row_inv_alpha) ;	  norm[i] = norm[i]/(1+alpha) +log(1+alpha);	  Norm_sum +=norm[i]*priors[i];	}    }  Norm_sum /= log(2.0);}void SparseMatrix::normalize_mat_L2()  /* compute the L_2 norms for each vec in the matrix and L_2-normalize them     first check if array 'norm' has been given memory space   */{  int i, j;  float norm;  for (i = 0; i < n_col; i++)    {      norm =0.0;      for (j = colptrs[i]; j < colptrs[i+1]; j++)	norm += vals[j] * vals[j];      if( norm >0.0 )	{	  norm = sqrt(norm);	  for (j = colptrs[i]; j < colptrs[i+1]; j++)	    vals[j] /= norm;	}    }}void SparseMatrix::normalize_mat_L1()  /* compute the L_1 norms for each vec in the matrix and L_1-normalize them     first check if array 'L1_norm' has been given memory space   */{  int i, j;  float norm;  for (i = 0; i < n_col; i++)    {      norm =0.0;      for (j = colptrs[i]; j < colptrs[i+1]; j++)	norm += fabs(vals[j]);      if(norm >0)	{	  for (j = colptrs[i]; j < colptrs[i+1]; j++)	    vals[j] /= norm;	}    }}void SparseMatrix::ith_add_CV(int i, float *CV){  for (int j = colptrs[i]; j < colptrs[i+1]; j++)    CV[rowinds[j]] += vals[j];}void SparseMatrix::ith_add_CV_prior(int i, float *CV){  for (int j = colptrs[i]; j < colptrs[i+1]; j++)    CV[rowinds[j]] += priors[i]*vals[j];}void SparseMatrix::CV_sub_ith(int i, float *CV){  for (int j = colptrs[i]; j < colptrs[i+1]; j++)    CV[rowinds[j]] -= vals[j];}void SparseMatrix::CV_sub_ith_prior(int i, float *CV){  for (int j = colptrs[i]; j < colptrs[i+1]; j++)    CV[rowinds[j]] -= priors[i]*vals[j];}float SparseMatrix::MutualInfo(){  float *rowSum= new float [n_row], MI=0.0;  int i;  for (i=0; i<n_row; i++)    rowSum[i] =0.0;  for (i=0; i<n_col; i++)    for (int j = colptrs[i]; j < colptrs[i+1]; j++)      rowSum[rowinds[j]] +=vals[j]*priors[i];      for (i=0; i<n_col; i++)    {      float temp=0;      for (int j = colptrs[i]; j < colptrs[i+1]; j++)	temp += vals[j]*log(vals[j]/rowSum[rowinds[j]]);      MI += temp *priors[i];    }  delete [] rowSum;  return(MI/log(2.0));}float SparseMatrix::exponential_kernel(float *v, int i, float norm_v, float sigma_squared)  // this function computes the exponential kernel distance between i_th data with the centroid v{  float result=0.0;  for (int j = colptrs[i]; j < colptrs[i+1]; j++)    result += vals[j] * v[rowinds[j]];  result *= -2.0;  result += norm[i]+norm_v;  result = exp(result*0.5/sigma_squared);  return result;}void SparseMatrix::exponential_kernel(float *x, float norm_x, float *result, float sigma_squared)  //this function computes the exponential kernel distance between all data with the centroid x{  for (int i = 0; i < n_col; i++)    result[i] = exponential_kernel(x, i, norm_x, sigma_squared);}float SparseMatrix::i_j_dot_product(int i, int j)//this function computes  dot product between vectors i and j{  float result =0;  if (i==j)    for ( int l= colptrs[i]; l < colptrs[i+1]; l++)      result += vals[l]*vals[l];  else    for ( int l= colptrs[i]; l < colptrs[i+1]; l++)      for ( int k= colptrs[j]; k < colptrs[j+1]; k++)	if(rowinds[l] == rowinds[k])	  result += vals[l]*vals[k];  return result;}
上一页 12
💿 文件大小 71 K
👤 上传用户 tianlin4431
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#Clustering #euclidean #variation #splitting
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -