📄 cluster.c

📁 聚类分析的源码集
💻 C
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* The C clustering library for cDNA microarray data. * Copyright (C) 2002 Michiel Jan Laurens de Hoon. * * This library was written at the Laboratory of DNA Information Analysis, * Human Genome Center, Institute of Medical Science, University of Tokyo, * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. * Contact: mdehoon@ims.u-tokyo.ac.jp *  * Permission to use, copy, modify, and distribute this software and its * documentation with or without modifications and for any purpose and * without fee is hereby granted, provided that any copyright notices * appear in all copies and that both those copyright notices and this * permission notice appear in supporting documentation, and that the * names of the contributors or copyright holders not be used in * advertising or publicity pertaining to distribution of the software * without specific prior permission. *  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOFTWARE. *  */#include <time.h>#include <stdlib.h>#include <math.h>#include <float.h>#include "ranlib.h"#include "cluster.h"#ifdef WINDOWS#  include <windows.h>#endif/* ************************************************************************ */#ifdef WINDOWS/* Then we make a Windows DLL */int WINAPIclusterdll_init (HANDLE h, DWORD reason, void* foo){  return 1;}#endif/* ************************************************************************ */double CALL mean(int n, double x[]){ double result = 0.;  int i;  for (i = 0; i < n; i++) result += x[i];  result /= n;  return result;}/* ************************************************************************ */double CALL median (int n, double x[])/*Find the median of X(1), ... , X(N), using as much of the quicksortalgorithm as is needed to isolate it.N.B. On exit, the array X is partially ordered.Based on Alan J. Miller's median.f90 routine.*/{ int i, j;  int nr = n / 2;  int nl = nr - 1;  int even = 0;  /* hi & lo are position limits encompassing the median. */  int lo = 0;  int hi = n-1;  if (n==2*nr) even = 1;  if (n<3)  { if (n<1) return 0.;    if (n == 1) return x[0];    return 0.5*(x[0]+x[1]);  }  /* Find median of 1st, middle & last values. */  do  { int loop;    int mid = (lo + hi)/2;    double result = x[mid];    double xlo = x[lo];    double xhi = x[hi];    if (xhi<xlo)    { double temp = xlo;      xlo = xhi;      xhi = temp;    }    if (result>xhi) result = xhi;    else if (result<xlo) result = xlo;    /* The basic quicksort algorithm to move all values <= the sort key (XMED)     * to the left-hand end, and all higher values to the other end.     */    i = lo;    j = hi;    do    { while (x[i]<result) i++;      while (x[j]>result) j--;      loop = 0;      if (i<j)      { double temp = x[i];        x[i] = x[j];        x[j] = temp;        i++;        j--;        if (i<=j) loop = 1;      }    } while (loop); /* Decide which half the median is in. */    if (even)    { if (j==nl && i==nr)        /* Special case, n even, j = n/2 & i = j + 1, so the median is         * between the two halves of the series.   Find max. of the first         * half & min. of the second half, then average.         */        { int k;          double xmax = x[0];          double xmin = x[n-1];          for (k = lo; k <= j; k++) xmax = max(xmax,x[k]);          for (k = i; k <= hi; k++) xmin = min(xmin,x[k]);          return 0.5*(xmin + xmax);        }      if (j<nl) lo = i;      if (i>nr) hi = j;      if (i==j)      { if (i==nl) lo = nl;        if (j==nr) hi = nr;      }    }    else    { if (j<nr) lo = i;      if (i>nr) hi = j;      /* Test whether median has been isolated. */      if (i==j && i==nr) return result;    }  }  while (lo<hi-1);  if (even) return (0.5*(x[nl]+x[nr]));  if (x[lo]>x[hi])  { double temp = x[lo];    x[lo] = x[hi];    x[hi] = temp;  }  return x[nr];}/* *********************************************************************  */staticint compare(const void* a, const void* b)/* Helper function for sort. Previously, this was a nested function under sort, * which is not allowed under ANSI C. */{ const double term1 = *(*(double**)a);  const double term2 = *(*(double**)b);  if (term1 < term2) return -1;  if (term1 > term2) return +1;  return 0;}void CALL sort(int n, const double data[], int index[])/* Sets up an index table given the data, such that data[index[]] is in * increasing order. Sorting is done on the pointers, from which the indeces * are recalculated. The array data is unchanged. */{ int i;  const double** p = malloc(n*sizeof(double*));  const double* start = data;  for (i = 0; i < n; i++) p[i] = &(data[i]);  qsort(p, n, sizeof(double*), compare);  for (i = 0; i < n; i++) index[i] = (int)(p[i]-start);  free(p);}staticvoid getrank (int n, double data[], double rank[])/* Calculates the ranks of the elements in the array data. Two elements with the * same value get the same rank, equal to the average of the ranks had the * elements different values. */{ int i;  int* index = malloc(n*sizeof(int));  /* Call sort to get an index table */  sort (n, data, index);  /* Build a rank table */  for (i = 0; i < n; i++) rank[index[i]] = i;  /* Fix for equal ranks */  i = 0;  while (i < n)  { int m;    double value = data[index[i]];    int j = i + 1;    while (j < n && data[index[j]] == value) j++;    m = j - i; /* number of equal ranks found */    value = rank[index[i]] + (m-1)/2.;    for (j = i; j < i + m; j++) rank[index[j]] = value;    i += m;  }  free (index);  return;}/* ********************************************************************* */void CALL svd(int m, int n, double** u, double w[], double** v, int* ierr)/* *   This subroutine is a translation of the Algol procedure svd, *   Num. Math. 14, 403-420(1970) by Golub and Reinsch. *   Handbook for Auto. Comp., Vol II-Linear Algebra, 134-151(1971). * *   This subroutine determines the singular value decomposition *        t *   A=usv  of a real m by n rectangular matrix, where m is greater *   then or equal to n.  Householder bidiagonalization and a variant *   of the QR algorithm are used. *   * *   On input. * *      m is the number of rows of A (and u). * *      n is the number of columns of A (and u) and the order of v. * *      u contains the rectangular input matrix A to be decomposed. * *   On output. * *      w contains the n (non-negative) singular values of a (the *        diagonal elements of s).  they are unordered.  if an *        error exit is made, the singular values should be correct *        for indices ierr+1,ierr+2,...,n. * *      u contains the matrix u (orthogonal column vectors) of the *        decomposition. *        if an error exit is made, the columns of u corresponding *        to indices of correct singular values should be correct. * *      v contains the matrix v (orthogonal) of the decomposition. *        if an error exit is made, the columns of v corresponding *        to indices of correct singular values should be correct. * *      ierr is set to *        zero       for normal return, *        k          if the k-th singular value has not been *                   determined after 30 iterations. * *   Questions and comments should be directed to B. S. Garbow, *   Applied Mathematics division, Argonne National Laboratory * *   Modified to eliminate machep * *   Translated to C by Michiel de Hoon, Human Genome Center, *   University of Tokyo, for inclusion in the C Clustering Library. *   This routine is less general than the original svd routine, as *   it focuses on the singular value decomposition as needed for *   clustering. In particular, *     - We require m >= n *     - We calculate both u and v in all cases *     - We pass the input array A via u; this array is subsequently *       overwritten. *     - We allocate for the array rv1, used as a working space, *       internally in this routine, instead of passing it as an *       argument. *   2003.06.05 */{ int i, j, k, i1, k1, l1, its;  double* rv1 = malloc(n*sizeof(double));  double c,f,h,s,x,y,z;  int l = 0;  double g = 0.0;  double scale = 0.0;  double anorm = 0.0;  *ierr = 0;  /* Householder reduction to bidiagonal form */  for (i = 0; i < n; i++)  { l = i + 1;    rv1[i] = scale * g;    g = 0.0;    s = 0.0;    scale = 0.0;    for (k = i; k < m; k++) scale += fabs(u[k][i]);    if (scale != 0.0)    { for (k = i; k < m; k++)      { u[k][i] /= scale;        s += u[k][i]*u[k][i];      }      f = u[i][i];      g = (f >= 0) ? -sqrt(s) : sqrt(s);      h = f * g - s;      u[i][i] = f - g;      if (i < n-1)      { for (j = l; j < n; j++)        { s = 0.0;          for (k = i; k < m; k++) s += u[k][i] * u[k][j];          f = s / h;          for (k = i; k < m; k++) u[k][j] += f * u[k][i];        }      }      for (k = i; k < m; k++) u[k][i] *= scale;    }    w[i] = scale * g;    g = 0.0;    s = 0.0;    scale = 0.0;    if (i<n-1)    { for (k = l; k < n; k++) scale += fabs(u[i][k]);      if (scale != 0.0)      { for (k = l; k < n; k++)        { u[i][k] /= scale;          s += u[i][k] * u[i][k];        }        f = u[i][l];        g = (f >= 0) ? -sqrt(s) : sqrt(s);        h = f * g - s;        u[i][l] = f - g;        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;        for (j = l; j < m; j++)        { s = 0.0;          for (k = l; k < n; k++) s += u[j][k] * u[i][k];          for (k = l; k < n; k++) u[j][k] += s * rv1[k];        }        for (k = l; k < n; k++)  u[i][k] *= scale;      }    }    anorm = max(anorm,fabs(w[i])+fabs(rv1[i]));  }  /* accumulation of right-hand transformations */  for (i = n-1; i>=0; i--)  { if (i < n-1)    { if (g != 0.0)      { for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;        /* double division avoids possible underflow */        for (j = l; j < n; j++)        { s = 0.0;          for (k = l; k < n; k++) s += u[i][k] * v[k][j];          for (k = l; k < n; k++) v[k][j] += s * v[k][i];        }      }    }    for (j = l; j < n; j++)    { v[i][j] = 0.0;      v[j][i] = 0.0;    }    v[i][i] = 1.0;    g = rv1[i];    l = i;  }  /* accumulation of left-hand transformations */  for (i = n-1; i >= 0; i--)  { l = i + 1;    g = w[i];    if (i!=n-1)      for (j = l; j < n; j++) u[i][j] = 0.0;    if (g!=0.0)    { if (i!=n-1)      { for (j = l; j < n; j++)        { s = 0.0;          for (k = l; k < m; k++) s += u[k][i] * u[k][j];          /* double division avoids possible underflow */          f = (s / u[i][i]) / g;          for (k = i; k < m; k++) u[k][j] += f * u[k][i];        }      }      for (j = i; j < m; j++) u[j][i] /= g;    }    else      for (j = i; j < m; j++) u[j][i] = 0.0;    u[i][i] += 1.0;  }  /* diagonalization of the bidiagonal form */  for (k = n-1; k >= 0; k--)  { k1 = k-1;    its = 0;    while(1)    /* test for splitting */    { for (l = k; l >= 0; l--)      { l1 = l-1;        if (fabs(rv1[l]) + anorm == anorm) break;        /* rv1[0] is always zero, so there is no exit         * through the bottom of the loop */        if (fabs(w[l1]) + anorm == anorm)        /* cancellation of rv1[l] if l greater than 0 */        { c = 0.0;          s = 1.0;          for (i = l; i <= k; i++)          { f = s * rv1[i];            rv1[i] *= c;            if (fabs(f) + anorm == anorm) break;            g = w[i];            h = sqrt(f*f+g*g);            w[i] = h;            c = g / h;            s = -f / h;            for (j = 0; j < m; j++)            { y = u[j][l1];              z = u[j][i];              u[j][l1] = y * c + z * s;              u[j][i] = -y * s + z * c;            }          }          break;        }      }      /* test for convergence */      z = w[k];      if (l==k) /* convergence */      { if (z < 0.0)        /*  w[k] is made non-negative */        { w[k] = -z;          for (j = 0; j < n; j++) v[j][k] = -v[j][k];        }        break;      }      else if (its==30)      { *ierr = k;        break;      }      else      /* shift from bottom 2 by 2 minor */      { its++;        x = w[l];        y = w[k1];        g = rv1[k1];        h = rv1[k];        f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);        g = sqrt(f*f+1.0);        f = ((x - z) * (x + z) + h * (y / (f + (f >= 0 ? g : -g)) - h)) / x;        /* next qr transformation */        c = 1.0;        s = 1.0;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -