📄 cluster.c
字号:
/* The C clustering library for cDNA microarray data. * Copyright (C) 2002 Michiel Jan Laurens de Hoon. * * This library was written at the Laboratory of DNA Information Analysis, * Human Genome Center, Institute of Medical Science, University of Tokyo, * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. * Contact: mdehoon@ims.u-tokyo.ac.jp * * Permission to use, copy, modify, and distribute this software and its * documentation with or without modifications and for any purpose and * without fee is hereby granted, provided that any copyright notices * appear in all copies and that both those copyright notices and this * permission notice appear in supporting documentation, and that the * names of the contributors or copyright holders not be used in * advertising or publicity pertaining to distribution of the software * without specific prior permission. * * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE * OR PERFORMANCE OF THIS SOFTWARE. * */#include <time.h>#include <stdlib.h>#include <math.h>#include <float.h>#include "ranlib.h"#include "cluster.h"#ifdef WINDOWS# include <windows.h>#endif/* ************************************************************************ */#ifdef WINDOWS/* Then we make a Windows DLL */int WINAPIclusterdll_init (HANDLE h, DWORD reason, void* foo){ return 1;}#endif/* ************************************************************************ */double CALL mean(int n, double x[]){ double result = 0.; int i; for (i = 0; i < n; i++) result += x[i]; result /= n; return result;}/* ************************************************************************ */double CALL median (int n, double x[])/*Find the median of X(1), ... , X(N), using as much of the quicksortalgorithm as is needed to isolate it.N.B. On exit, the array X is partially ordered.Based on Alan J. Miller's median.f90 routine.*/{ int i, j; int nr = n / 2; int nl = nr - 1; int even = 0; /* hi & lo are position limits encompassing the median. */ int lo = 0; int hi = n-1; if (n==2*nr) even = 1; if (n<3) { if (n<1) return 0.; if (n == 1) return x[0]; return 0.5*(x[0]+x[1]); } /* Find median of 1st, middle & last values. */ do { int loop; int mid = (lo + hi)/2; double result = x[mid]; double xlo = x[lo]; double xhi = x[hi]; if (xhi<xlo) { double temp = xlo; xlo = xhi; xhi = temp; } if (result>xhi) result = xhi; else if (result<xlo) result = xlo; /* The basic quicksort algorithm to move all values <= the sort key (XMED) * to the left-hand end, and all higher values to the other end. */ i = lo; j = hi; do { while (x[i]<result) i++; while (x[j]>result) j--; loop = 0; if (i<j) { double temp = x[i]; x[i] = x[j]; x[j] = temp; i++; j--; if (i<=j) loop = 1; } } while (loop); /* Decide which half the median is in. */ if (even) { if (j==nl && i==nr) /* Special case, n even, j = n/2 & i = j + 1, so the median is * between the two halves of the series. Find max. of the first * half & min. of the second half, then average. */ { int k; double xmax = x[0]; double xmin = x[n-1]; for (k = lo; k <= j; k++) xmax = max(xmax,x[k]); for (k = i; k <= hi; k++) xmin = min(xmin,x[k]); return 0.5*(xmin + xmax); } if (j<nl) lo = i; if (i>nr) hi = j; if (i==j) { if (i==nl) lo = nl; if (j==nr) hi = nr; } } else { if (j<nr) lo = i; if (i>nr) hi = j; /* Test whether median has been isolated. */ if (i==j && i==nr) return result; } } while (lo<hi-1); if (even) return (0.5*(x[nl]+x[nr])); if (x[lo]>x[hi]) { double temp = x[lo]; x[lo] = x[hi]; x[hi] = temp; } return x[nr];}/* ********************************************************************* */staticint compare(const void* a, const void* b)/* Helper function for sort. Previously, this was a nested function under sort, * which is not allowed under ANSI C. */{ const double term1 = *(*(double**)a); const double term2 = *(*(double**)b); if (term1 < term2) return -1; if (term1 > term2) return +1; return 0;}void CALL sort(int n, const double data[], int index[])/* Sets up an index table given the data, such that data[index[]] is in * increasing order. Sorting is done on the pointers, from which the indeces * are recalculated. The array data is unchanged. */{ int i; const double** p = malloc(n*sizeof(double*)); const double* start = data; for (i = 0; i < n; i++) p[i] = &(data[i]); qsort(p, n, sizeof(double*), compare); for (i = 0; i < n; i++) index[i] = (int)(p[i]-start); free(p);}staticvoid getrank (int n, double data[], double rank[])/* Calculates the ranks of the elements in the array data. Two elements with the * same value get the same rank, equal to the average of the ranks had the * elements different values. */{ int i; int* index = malloc(n*sizeof(int)); /* Call sort to get an index table */ sort (n, data, index); /* Build a rank table */ for (i = 0; i < n; i++) rank[index[i]] = i; /* Fix for equal ranks */ i = 0; while (i < n) { int m; double value = data[index[i]]; int j = i + 1; while (j < n && data[index[j]] == value) j++; m = j - i; /* number of equal ranks found */ value = rank[index[i]] + (m-1)/2.; for (j = i; j < i + m; j++) rank[index[j]] = value; i += m; } free (index); return;}/* ********************************************************************* */void CALL svd(int m, int n, double** u, double w[], double** v, int* ierr)/* * This subroutine is a translation of the Algol procedure svd, * Num. Math. 14, 403-420(1970) by Golub and Reinsch. * Handbook for Auto. Comp., Vol II-Linear Algebra, 134-151(1971). * * This subroutine determines the singular value decomposition * t * A=usv of a real m by n rectangular matrix, where m is greater * then or equal to n. Householder bidiagonalization and a variant * of the QR algorithm are used. * * * On input. * * m is the number of rows of A (and u). * * n is the number of columns of A (and u) and the order of v. * * u contains the rectangular input matrix A to be decomposed. * * On output. * * w contains the n (non-negative) singular values of a (the * diagonal elements of s). they are unordered. if an * error exit is made, the singular values should be correct * for indices ierr+1,ierr+2,...,n. * * u contains the matrix u (orthogonal column vectors) of the * decomposition. * if an error exit is made, the columns of u corresponding * to indices of correct singular values should be correct. * * v contains the matrix v (orthogonal) of the decomposition. * if an error exit is made, the columns of v corresponding * to indices of correct singular values should be correct. * * ierr is set to * zero for normal return, * k if the k-th singular value has not been * determined after 30 iterations. * * Questions and comments should be directed to B. S. Garbow, * Applied Mathematics division, Argonne National Laboratory * * Modified to eliminate machep * * Translated to C by Michiel de Hoon, Human Genome Center, * University of Tokyo, for inclusion in the C Clustering Library. * This routine is less general than the original svd routine, as * it focuses on the singular value decomposition as needed for * clustering. In particular, * - We require m >= n * - We calculate both u and v in all cases * - We pass the input array A via u; this array is subsequently * overwritten. * - We allocate for the array rv1, used as a working space, * internally in this routine, instead of passing it as an * argument. * 2003.06.05 */{ int i, j, k, i1, k1, l1, its; double* rv1 = malloc(n*sizeof(double)); double c,f,h,s,x,y,z; int l = 0; double g = 0.0; double scale = 0.0; double anorm = 0.0; *ierr = 0; /* Householder reduction to bidiagonal form */ for (i = 0; i < n; i++) { l = i + 1; rv1[i] = scale * g; g = 0.0; s = 0.0; scale = 0.0; for (k = i; k < m; k++) scale += fabs(u[k][i]); if (scale != 0.0) { for (k = i; k < m; k++) { u[k][i] /= scale; s += u[k][i]*u[k][i]; } f = u[i][i]; g = (f >= 0) ? -sqrt(s) : sqrt(s); h = f * g - s; u[i][i] = f - g; if (i < n-1) { for (j = l; j < n; j++) { s = 0.0; for (k = i; k < m; k++) s += u[k][i] * u[k][j]; f = s / h; for (k = i; k < m; k++) u[k][j] += f * u[k][i]; } } for (k = i; k < m; k++) u[k][i] *= scale; } w[i] = scale * g; g = 0.0; s = 0.0; scale = 0.0; if (i<n-1) { for (k = l; k < n; k++) scale += fabs(u[i][k]); if (scale != 0.0) { for (k = l; k < n; k++) { u[i][k] /= scale; s += u[i][k] * u[i][k]; } f = u[i][l]; g = (f >= 0) ? -sqrt(s) : sqrt(s); h = f * g - s; u[i][l] = f - g; for (k = l; k < n; k++) rv1[k] = u[i][k] / h; for (j = l; j < m; j++) { s = 0.0; for (k = l; k < n; k++) s += u[j][k] * u[i][k]; for (k = l; k < n; k++) u[j][k] += s * rv1[k]; } for (k = l; k < n; k++) u[i][k] *= scale; } } anorm = max(anorm,fabs(w[i])+fabs(rv1[i])); } /* accumulation of right-hand transformations */ for (i = n-1; i>=0; i--) { if (i < n-1) { if (g != 0.0) { for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g; /* double division avoids possible underflow */ for (j = l; j < n; j++) { s = 0.0; for (k = l; k < n; k++) s += u[i][k] * v[k][j]; for (k = l; k < n; k++) v[k][j] += s * v[k][i]; } } } for (j = l; j < n; j++) { v[i][j] = 0.0; v[j][i] = 0.0; } v[i][i] = 1.0; g = rv1[i]; l = i; } /* accumulation of left-hand transformations */ for (i = n-1; i >= 0; i--) { l = i + 1; g = w[i]; if (i!=n-1) for (j = l; j < n; j++) u[i][j] = 0.0; if (g!=0.0) { if (i!=n-1) { for (j = l; j < n; j++) { s = 0.0; for (k = l; k < m; k++) s += u[k][i] * u[k][j]; /* double division avoids possible underflow */ f = (s / u[i][i]) / g; for (k = i; k < m; k++) u[k][j] += f * u[k][i]; } } for (j = i; j < m; j++) u[j][i] /= g; } else for (j = i; j < m; j++) u[j][i] = 0.0; u[i][i] += 1.0; } /* diagonalization of the bidiagonal form */ for (k = n-1; k >= 0; k--) { k1 = k-1; its = 0; while(1) /* test for splitting */ { for (l = k; l >= 0; l--) { l1 = l-1; if (fabs(rv1[l]) + anorm == anorm) break; /* rv1[0] is always zero, so there is no exit * through the bottom of the loop */ if (fabs(w[l1]) + anorm == anorm) /* cancellation of rv1[l] if l greater than 0 */ { c = 0.0; s = 1.0; for (i = l; i <= k; i++) { f = s * rv1[i]; rv1[i] *= c; if (fabs(f) + anorm == anorm) break; g = w[i]; h = sqrt(f*f+g*g); w[i] = h; c = g / h; s = -f / h; for (j = 0; j < m; j++) { y = u[j][l1]; z = u[j][i]; u[j][l1] = y * c + z * s; u[j][i] = -y * s + z * c; } } break; } } /* test for convergence */ z = w[k]; if (l==k) /* convergence */ { if (z < 0.0) /* w[k] is made non-negative */ { w[k] = -z; for (j = 0; j < n; j++) v[j][k] = -v[j][k]; } break; } else if (its==30) { *ierr = k; break; } else /* shift from bottom 2 by 2 minor */ { its++; x = w[l]; y = w[k1]; g = rv1[k1]; h = rv1[k]; f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); g = sqrt(f*f+1.0); f = ((x - z) * (x + z) + h * (y / (f + (f >= 0 ? g : -g)) - h)) / x; /* next qr transformation */ c = 1.0; s = 1.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -