⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.c

📁 聚类分析的源码集
💻 C
📖 第 1 页 / 共 5 页
字号:
  }  if (!flag) return 0.;  denomx = con + dis + exx;  denomy = con + dis + exy;  if (denomx==0) return 1;  if (denomy==0) return 1;  tau = (con-dis)/sqrt(denomx*denomy);  return 1.-tau;}/* *********************************************************************  */staticvoid setmetric (char dist,  double (**metric)    (int,double**,double**,int**,int**, const double[],int,int,int) ){ switch(dist)  { case ('e'): *metric = &euclid; break;    case ('h'): *metric = &harmonic; break;    case ('b'): *metric = &cityblock; break;    case ('c'): *metric = &correlation; break;    case ('a'): *metric = &acorrelation; break;    case ('u'): *metric = &ucorrelation; break;    case ('x'): *metric = &uacorrelation; break;    case ('s'): *metric = &spearman; break;    case ('k'): *metric = &kendall; break;    default: *metric = &euclid; break;  }  return;}/* *********************************************************************  */void CALL initran(void)/*Purpose=======The routine initran initializes the random number generator using the currenttime. The current epoch time in seconds is used as a seed for the standard Crandom number generator. The first two random number generated by the standardC random number generator are then used to initialize the ranlib random numbergenerator.External Subroutines:time.h:     timeranlib.h:   setall============================================================================*/{ int initseed = time(0);  int iseed1, iseed2;  srand(initseed);  iseed1 = rand();  iseed2 = rand();  setall (iseed1, iseed2);  return;}/* ************************************************************************ */void CALL randomassign (int nclusters, int nelements, int clusterid[])/*Purpose=======The randomassign routine performs an initial random clustering, needed fork-means or k-median clustering. Elements (genes or microarrays) are randomlyassigned to clusters. First, nclust elements are randomly chosen to be assignedto the clusters 0..nclust-1 in order to guarantee that none of the clustersare empty. The remaining elements are then randomly assigned to a cluster.Arguments=========nclust  (input) intThe number of clusters.nelements  (input) intThe number of elements to be clustered (i.e., the number of genes or microarraysto be clustered).clusterid  (output) int array, dimension( nelements )The cluster number to which an element was assigned.External Functions:ranlib: int genprm============================================================================*/{ int i;  long* map = malloc(nelements*sizeof(long));  /* Initialize mapping */  for (i = 0; i < nelements; i++) map[i] = i;  /* Create a random permutation of this mapping */  genprm (map, nelements);  /* Assign each of the first nclusters elements to a different cluster   * to avoid empty clusters */  for (i = 0; i < nclusters; i++) clusterid[map[i]] = i;  /* Assign other elements randomly to a cluster */  for (i = nclusters; i < nelements; i++)    clusterid[map[i]] = ignuin (0,nclusters-1);  free(map);  return;}/* ********************************************************************* */void getclustermean(int nclusters, int nrows, int ncolumns,  double** data, int** mask, int clusterid[], double** cdata, int** cmask,  int transpose)/*Purpose=======The getclustermean routine calculates the cluster centroids, given to whichcluster each element belongs. The centroid is defined as the mean over allelements for each dimension.Arguments=========nclusters  (input) intThe number of clusters.nrows     (input) intThe number of rows in the gene expression data matrix, equal to the number ofgenes.ncolumns  (input) intThe number of columns in the gene expression data matrix, equal to the number ofmicroarrays.data       (input) double array, dimension( nrows,ncolumns )The array containing the gene expression data.mask       (input) int array, dimension( nrows,ncolumns )This array shows which data values are missing. Ifmask[i][j] == 0, then data[i][j] is missing.clusterid  (output) int array, dimension( nrows or ncolumns )The cluster number to which each element belongs. If transpose==0, then thedimension of clusterid is equal to nrows (the number of genes). Otherwise, itis equal to ncolumns (the number of microarrays).cdata      (output) double array, dimension( nclusters,ncolumns ) (transpose==0)                               or dimension( nrows, nclusters) (transpose==1)On exit of getclustermean, this array contains the cluster centroids.cmask      (output) int array, dimension( nclusters,ncolumns ) (transpose==0)                            or dimension( nrows, nclusters) (transpose==1)This array shows which data values of are missing for each centroid. Ifcmask[i][j] == 0, then cdata[i][j] is missing. A data value is missing for acentroid if the corresponding data values of the cluster members are allmissing.transpose  (input) intIf transpose==0, clusters of rows (genes) are specified. Otherwise, clusters ofcolumns (microarrays) are specified.========================================================================*/{ int i, j, k;  if (transpose==0)  { int** count = malloc(nclusters*sizeof(int*));    for (i = 0; i < nclusters; i++)    { count[i] = calloc(ncolumns,sizeof(int));      for (j = 0; j < ncolumns; j++) cdata[i][j] = 0.;    }    for (k = 0; k < nrows; k++)    { i = clusterid[k];      for (j = 0; j < ncolumns; j++)        if (mask[k][j] != 0)        { cdata[i][j] = cdata[i][j] + data[k][j];          count[i][j] = count[i][j] + 1;        }    }    for (i = 0; i < nclusters; i++)    { for (j = 0; j < ncolumns; j++)      { if (count[i][j]>0)        { cdata[i][j] = cdata[i][j] / count[i][j];          cmask[i][j] = 1;        }        else          cmask[i][j] = 0;      }      free (count[i]);    }    free (count);  }  else  { int** count = malloc(nrows*sizeof(int*));    for (i = 0; i < nrows; i++)    { count[i] = calloc(nclusters,sizeof(int));      for (j = 0; j < nclusters; j++) cdata[i][j] = 0.;    }    for (k = 0; k < ncolumns; k++)    { i = clusterid[k];      for (j = 0; j < nrows; j++)      { if (mask[j][k] != 0)        { cdata[j][i] = cdata[j][i] + data[j][k];          count[j][i] = count[j][i] + 1;        }      }    }    for (i = 0; i < nrows; i++)    { for (j = 0; j < nclusters; j++)      { if (count[i][j]>0)        { cdata[i][j] = cdata[i][j] / count[i][j];          cmask[i][j] = 1;        }        else          cmask[i][j] = 0;      }      free (count[i]);    }    free (count);  }  return;}/* ********************************************************************* */void getclustermedian(int nclusters, int nrows, int ncolumns,  double** data, int** mask, int clusterid[], double** cdata, int** cmask,  int transpose)/*Purpose=======The getclustermedian routine calculates the cluster centroids, given to whichcluster each element belongs. The centroid is defined as the median over allelements for each dimension.Arguments=========nclusters  (input) intThe number of clusters.nrows     (input) intThe number of rows in the gene expression data matrix, equal to the number ofgenes.ncolumns  (input) intThe number of columns in the gene expression data matrix, equal to the number ofmicroarrays.data       (input) double array, dimension( nrows,ncolumns )The array containing the gene expression data.mask       (input) int array, dimension( nrows,ncolumns )This array shows which data values are missing. Ifmask[i][j] == 0, then data[i][j] is missing.clusterid  (output) int array, dimension( nrows or ncolumns )The cluster number to which each element belongs. If transpose==0, then thedimension of clusterid is equal to nrows (the number of genes). Otherwise, itis equal to ncolumns (the number of microarrays).cdata      (output) double array, dimension( nclusters,ncolumns ) (transpose==0)                               or dimension( nrows, nclusters) (transpose==1)On exit of getclustermedian, this array contains the cluster centroids.cmask      (output) int array, dimension( nclusters,ncolumns ) (transpose==0)                            or dimension( nrows, nclusters) (transpose==1)This array shows which data values of are missing for each centroid. Ifcmask[i][j] == 0, then cdata[i][j] is missing. A data value is missing for acentroid if the corresponding data values of the cluster members are allmissing.transpose  (input) intIf transpose==0, clusters of rows (genes) are specified. Otherwise, clusters ofcolumns (microarrays) are specified.========================================================================*/{ int i, j, k;  if (transpose==0)  { double* temp = malloc(nrows*sizeof(double));    for (i = 0; i < nclusters; i++)    { for (j = 0; j < ncolumns; j++)      { int count = 0;        for (k = 0; k < nrows; k++)          if (i==clusterid[k] && mask[k][j])          { temp[count] = data[k][j];            count++;          }        if (count>0)        { cdata[i][j] = median (count,temp);          cmask[i][j] = 1;        }        else        { cdata[i][j] = 0.;          cmask[i][j] = 0;        }      }    }    free (temp);  }  else  { double* temp = malloc(ncolumns*sizeof(double));    for (i = 0; i < nclusters; i++)    { for (j = 0; j < nrows; j++)      { int count = 0;        for (k = 0; k < ncolumns; k++)          if (i==clusterid[k] && mask[j][k])          { temp[count] = data[j][k];            count++;          }        if (count>0)        { cdata[j][i] = median (count,temp);          cmask[j][i] = 1;        }        else        { cdata[j][i] = 0.;          cmask[j][i] = 0;        }      }    }    free (temp);  }  return;}/* ********************************************************************* */void getclustermedoid(int nclusters, int nelements, double** distance,  int clusterid[], int centroids[], double errors[])/*Purpose=======The getclustermedoid routine calculates the cluster centroids, given to whichcluster each element belongs. The centroid is defined as the element with thesmallest sum of distances to the other elements.Arguments=========nclusters  (input) intThe number of clusters.nelements  (input) intThe total number of elements.distmatrix (input) double array, ragged  (number of rows is nelements, number of columns is equal to the row number)The distance matrix. To save space, the distance matrix is given in theform of a ragged array. The distance matrix is symmetric and has zeroson the diagonal. See distancematrix for a description of the content.clusterid  (output) int array, dimension( nelements )The cluster number to which each element belongs.centroid   (output) int array, dimension( nclusters )The index of the element that functions as the centroid for each cluster.errors     (output) double array, dimension( nclusters )The within-cluster sum of distances between the items and the clustercentroid.========================================================================*/{ int i, j, k;  for (j = 0; j < nclusters; j++) errors[j] = DBL_MAX;  for (i = 0; i < nelements; i++)  { double d = 0.0;    j = clusterid[i];    for (k = 0; k < nelements; k++)    { if (i==k || clusterid[k]!=j) continue;      d += (i < k ? distance[k][i] : distance[i][k]);      if (d > errors[j]) break;    }    if (d < errors[j])    { errors[j] = d;      centroids[j] = i;    }  }}/* ********************************************************************* */staticvoid emalg (int nclusters, int nrows, int ncolumns,  double** data, int** mask, double weight[], int transpose, int init_given,  void getclustercenter    (int,int,int,double**,int**,int[],double**,int**,int),  double metric (int,double**,double**,int**,int**,const double[],int,int,int),  int clusterid[], double** cdata, int** cmask){ const int nobjects = (transpose==0) ? nrows : ncolumns;  const int ndata = (transpose==0) ? ncolumns : nrows;  int* cn = calloc(nclusters,sizeof(int));  /* This will contain the number of elements in each cluster. This is needed   * to check for empty clusters.   */  int* savedids = malloc(nobjects*sizeof(int));  /* needed to check for periodic behavior */  int same;  int changed;  int iteration = 0;  int period = 10;  long* order = malloc(nobjects*sizeof(long));  int jj;  for (jj = 0; jj < nobjects; jj++) order[jj] = jj;  if(!init_given) randomassign (nclusters, nobjects, clusterid);  for (jj = 0; jj < nobjects; jj++)  { int ii = clusterid[jj];    cn[ii]++;  }  /* Start the loop */  do  { int ii;    if (iteration % period == 0)    { /* save the current clustering solution */      for (ii = 0; ii < nobjects; ii++) savedids[ii] = clusterid[ii];      period = period * 2;    }    iteration += 1;    /* Find the center */    getclustercenter (nclusters, nrows, ncolumns, data, mask,                      clusterid, cdata, cmask, transpose);    /* Create a random order (except if the user specified an initial     * clustering, in which case we run the algorithm fully     * deterministically.  */    if (!init_given) genprm (order, nobjects);    changed = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -