⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 matrix.cc

📁 gmeans-- Clustering with first variation and splitting 文本聚类算法Gmeans ,使用了3种相似度函数,cosine,euclidean ,K
💻 CC
字号:
/* Implementation of Matrix class * Copyright (c) 2003, Yuqiang Guan */#include <assert.h>#include <stdlib.h>#include <math.h>#include <iostream.h>#include <fstream.h>#include "Matrix.h"Matrix::Matrix(int r, int c){  n_row =r;   n_col =c;  memory_used = 0;  norm  = NULL;  L1_sum = Norm_sum =0;  priors= L1_norm = NULL;}int Matrix::GetNumRow() { return n_row; }int Matrix::GetNumCol() { return n_col; }float Matrix::GetL1Sum() { return L1_sum;}long Matrix::GetMemoryUsed() { return memory_used;}float Matrix::GetNorm(int i) { return norm[i]; }float Matrix::GetL1Norm(int i) { return L1_norm[i]; }float Matrix::GetNormSum() { return Norm_sum; }void Matrix::SetAlpha(float a, int laplace){   switch(laplace)    {    case NOLAPLACE:      alpha =0;      break;    case CENTER_LAPLACE:      alpha = a;      break;    case PRIOR_LAPLACE:      alpha /= n_col;      break;     }  }float Matrix::GetAlpha(){ return alpha;}  void Matrix::SetPrior(int method, char * prior_file, int n_E_Docs, int *empty_Docs_ID){  float inv = 1.0/(n_col-n_E_Docs);  int i;  char whole_line[256];  std::ifstream gpfile(prior_file);  if (priors ==NULL)    {      priors = new float [n_col];      memory_used += n_col *sizeof(float);    }  switch(method)    {    case FILE_PRIOR:            if(gpfile.is_open())	{	  cout<<"Prior file: "<<prior_file<<" is used."<<endl;	  gpfile>>i;	  gpfile.getline(whole_line, 256, '\n');	  if(i!=n_col)	    {	      cout<<"Wrong number of vectors!!! \nSo using uniform priors ..."<<endl;	      for (int i=0; i<n_col; i++)		priors[i] = inv;	    }	  else	    {	      float sum=0, temp;	      int k=0;	      for (i = 0; i < n_col; i++)		{		  while (i<empty_Docs_ID[k])		    {		      gpfile>>priors[i];		      gpfile.getline(whole_line, 256, '\n');		      sum +=priors[i];		      i++;		    }		  gpfile>>temp;		  k++;		}	      inv = 1.0/sum;	      for (i=0; i<n_col; i++)		priors[i] *= inv;	       for (i = 0; i<n_E_Docs; i++)		 priors[empty_Docs_ID[i]]=0;	    }	  gpfile.close();	}      else	{	  cout<<"Can't open file "<<prior_file<<" !!! \nSo using uniform priors ..."<<endl;	  for (int i=0; i<n_col; i++)	    priors[i] = inv;	}      break;    case UNIFORM_PRIOR:    default:      int k=0;      for (i = 0; i < n_col; i++)	{	  while (i<empty_Docs_ID[k])	    {	      priors[i] = inv;	      i++;	    }	  k++;	}      for (i = 0; i<n_E_Docs; i++)	priors[empty_Docs_ID[i]]=0;    } }float Matrix::getPrior(int i) {  if ( (i < n_col) && (i>=0))    return priors[i];  else    return 0;}float Matrix::kernel_sum(int *pointer, int *index, int i)  //compute L2 norm of ith kernel centroid{  int start, end;  float kernel_norm_L2=0;  start = pointer[i];  end = pointer[i+1];  for (int i=start; i<end; i++)    for (int j = start; j < end; j++)      kernel_norm_L2 += Sim_Mat(index[i],index[j]);        return kernel_norm_L2;}void Matrix::kernel_norm_L2(int *cluster, int *cluster_size, int n_Clusters, float *result, int flag)//this function computes the L2 norm of kernel centroids {  int i, *pointer = new int[n_Clusters+1], *index = new int[n_col];  pointer[0] =0;  for (i=1; i<n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  for (i=0; i<n_col; i++)    {      index[pointer[cluster[i]]] = i;      pointer[cluster[i]] ++;    }  pointer[0] =0;  for (i=1; i<=n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  if (flag <0)    for (i = 0; i < n_Clusters; i++)      result[i] = sqrt(kernel_sum(pointer, index, i));  else    result[flag]= sqrt(kernel_sum(pointer, index, flag));  delete [] pointer;  delete [] index;}int Matrix::kernel_dot_product(int *pointer, int *index,int j, int n_Clusters, float *kernel_center_norm)//this function computes the polynomial kernel distance between jth data with all kernel centroids  //return the closest center index{  int start, end, temp_id=0;  float result, temp =0;    for (int i=0; i<n_Clusters; i++)    {      start = pointer[i];      end = pointer[i+1];      result =0;      for (int l = start; l < end; l++)	result +=Sim_Mat(j,index[l]);      result /= kernel_center_norm[i];      //cout<<"("<<j<<","<<i<<")="<<result<<endl;      if (result >temp)	{	  temp_id = i;	  temp =result;	}    }  return temp_id;}/*void Matrix::polynomial_kernel(int *cluster, int *cluster_size, int n_Clusters, float **result, float c, int d, float *kernel_center_norm, int flag)//this function computes the polynomial kernel distance between all data with kernel centroids{  int i, *pointer = new int[n_Clusters+1], *index = new int[n_col];  pointer[0] =0;  for (i=1; i<n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  for (i=0; i<n_col; i++)    {      index[pointer[cluster[i]]] = i;      pointer[cluster[i]] ++;    }  pointer[0] =0;  for (i=1; i<=n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];    if (flag <0)    for (i = 0; i < n_Clusters; i++)      polynomial_kernel(pointer, index, i, c, d, kernel_center_norm, result[i]);  else    polynomial_kernel(pointer, index, flag, c, d,kernel_center_norm, result[i]);  delete [] pointer;  delete [] index;}*/float Matrix::phi_x_dot_phi_y(int i, int j, float c, int d){  float result;  result = i_j_dot_product(i,j)+c;  for(int k=1; k<d; k++)    result *= result;  return result;}void Matrix::pair_wise_dot_product(float constant, int degree, int alg)//this function computes pair-wise dot product between vectors in the kernel space  //essentially it is A^T * A{  int i, j;  float temp;  cout<<"Computing pair-wise kernels..."<<endl;   Sim_Mat = new VECTOR_double[n_col];  for (j = 0; j < n_col; j++)    Sim_Mat[j] = new float[j+1];   memory_used+=(n_col+1)*n_col/2*sizeof(float);    switch (alg)    {    case KERNEL_SPHERICAL_K_MEANS:      for (i = 0; i < n_col; i++)	if((temp = phi_x_dot_phi_y(i, i, constant,degree))>0)	  Sim_Mat[i][i] = 1.0/sqrt(temp);	else	  Sim_Mat[i][i] = 0;      for(i = 1; i < n_col; i++)	for(j = 0; j <i; j++)	  Sim_Mat[i][j] = phi_x_dot_phi_y(i, j, constant,degree)*Sim_Mat[i][i]*Sim_Mat[j][j];            for (i = 0; i < n_col; i++)	if (Sim_Mat[i][i]>0)	  Sim_Mat[i][i] = 1;	else	  Sim_Mat[i][i] = 0;      break;    case KERNEL_EUCLIDEAN_K_MEANS:      for (i = 0; i < n_col; i++)	for(j = 0; j <=i; j++)	  Sim_Mat[i][j] = phi_x_dot_phi_y(i, j, constant,degree);      break;    }      /*	for (i = 0; i < n_col; i++)	{	for(j = 0; j <=i; j++)	cout<<Sim_Mat[i][j]<<"\t";	cout<<endl;	}*/}float Matrix::kernel_euc_dis(int *pointer, int *index, int j, float *kernel_center_sum, int cluster_id)  //this function computes the polynomial kernel Euclidean distance between jth data with the 'cluster_id'-th kernel centroid  //return Euclidean distance between jth data the 'cluster_id'-th center{  int start, end;  float temp, cluster_size_inv;    start = pointer[cluster_id];  end = pointer[cluster_id+1];  cluster_size_inv = 1.0/(end-start);  temp = 0;  for (int l = start; l < end; l++)    temp -=Sim_Mat(j,index[l]);  temp *= 2*cluster_size_inv;  temp += kernel_center_sum[cluster_id]*cluster_size_inv*cluster_size_inv + Sim_Mat[j][j];  return temp;}int Matrix::kernel_euc_dis(int *pointer, int *index,int j, int n_Clusters, float *kernel_center_sum)  //this function computes the polynomial kernel Euclidean distance between jth data with all kernel centroids  //return the closest center index{  int temp_id=0;  float result, temp;    temp = kernel_euc_dis(pointer, index, j, kernel_center_sum, 0);  for (int i=1; i<n_Clusters; i++)    {      result = kernel_euc_dis(pointer, index, j, kernel_center_sum, i);      //cout<<"("<<j<<","<<i<<")="<<result<<endl;      if (result < temp)	{	  temp_id = i;	  temp =result;	}    }  return temp_id;}float Matrix::pair_wise_kernel_euc_dis(int i, int j)  // compute Euclidean distance between vector i and j in kernel space{  return Sim_Mat[i][i]-2*Sim_Mat[i][j]+Sim_Mat[j][j];}  void Matrix::cluster_kernel_sum(int *cluster, int *cluster_size, int n_Clusters, float *result, int flag)//this function computes the sum of kernels of clusters{  int i, *pointer = new int[n_Clusters+1], *index = new int[n_col];  pointer[0] =0;  for (i=1; i<n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  for (i=0; i<n_col; i++)    {      index[pointer[cluster[i]]] = i;      pointer[cluster[i]] ++;    }  pointer[0] =0;  for (i=1; i<=n_Clusters; i++)    pointer[i] = pointer[i-1] + cluster_size[i-1];  if (flag <0)    for (i = 0; i < n_Clusters; i++)      result[i] = kernel_sum(pointer, index, i);  else    result[flag]= kernel_sum(pointer, index, flag);  delete [] pointer;  delete [] index;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -