logodds.c

来自「EM算法的改进」· C语言 代码 · 共 510 行 · 第 1/2 页

C
510
字号
      means[i][j] /= alen;    }  }  /* compute the maximum sum of Pearson's correlation coefficient for      motif pairs  */  for (i=0; i<nmotifs; i++) {			/* "from" motif */    int alen = los[i]->alen;		 	/* alphabet length */    los[i]->corr = NULL;     Resize(los[i]->corr, nmotifs, double);	/* create correlation array */    for (j=0; j<i; j++) {			/* "to" motif */      double rsum_max = LITTLE;			/* max. sum of r */      LO *lo1, *lo2;      int w1, w2;      double *mu1, *mu2;      for (o=0; o<2; o++) {		/* align i to j, then j to i */        int m1, m2;        if (o==0) {			/* align motif i to motif j */          m1 = i;          m2 = j;        } else {			/* align motif j to motif i */          m1 = j;          m2 = i;        }	lo1 = los[m1];	lo2 = los[m2];	w1 = lo1->w;	w2 = lo2->w;	mu1 = means[m1];	mu2 = means[m2];	for (k=0; k<w2; k++) {			/* alignment */	  double rsum = 0;			/* sum of corr. coeffs */	  for (l=0, m=k; l<w1 && m<w2; l++, m++) { /* column pair */	    /* correlation of column l in motif 1 and column m in motif 2 */	    double sum1=0, sum2=0, sum3=0;	    double r, denom;	    for (n=0; n<alen; n++) { 		/* letter */	      double a = lo1->logodds(l,n) - mu1[l];	      double b = lo2->logodds(m,n) - mu2[m];	      sum1 += a * b;	      sum2 += a * a;	      sum3 += b * b;	    } /* letter */            denom = sqrt(sum2*sum3);	    r = denom ? sum1/denom : 1;	    rsum += r;	  } /* column pair */	  rsum_max = MAX(rsum_max, rsum);	} /* alignment */      } /* i to j, j to i */      los[i]->corr[j] = rsum_max/MIN(los[i]->w, los[j]->w);	/* save */    } /* to motif */  } /* from motif */} /* motif_corr *//**********************************************************************//*	make_double_lo	Create a double-letter logodds matrix for efficiency.*//**********************************************************************/extern void make_double_lo(  LO *los[],		/* array of pointers to log-odds matrices */  int nmotifs 		/* number of log-odds matrices in los */) {  int i, j, k, imotif;   for (imotif=0; imotif<nmotifs; imotif++) {	/* each motif */    LO *lo = los[imotif];		/* motif */    int w = lo->w;			/* width of motif */    BOOLEAN pair = lo->pair;            /* double motif if true */    int alen = lo->alen;		/* length of motif alphabet */    int ncols = (alen+1)*(alen+1)+1;	/* columns (letters) in double matrix */    int nrows = (w+1)/2;		/* rows in hashed matrix */    LOGODDS logodds = lo->logodds;	/* single-letter matrix */    LOGODDS2 logodds2;			/* double-letter matrix */    /*       Create the double-letter logodds matrix that gives scores for      each possible pair of letters.  Alphabet length is extended      by one for the "blank" character necessary when the sequence length      or motif length is odd.    */    if (pair) nrows *= 2;		/* double number of rows if 2 motifs */    create_2array(logodds2, LOGODDS2B, nrows, ncols);    for (i=0; i<w; i+=2) {                      /* motif position */      for (j=0; j<alen; j++) {			/* letter in position+0 */        for (k=0; k<=alen; k++) {               /* letter in position+1 */          logodds2(i/2, dhash(j, k, alen)) =            (LOGODDS2B) ((i==(w-1) || k==alen) ?              logodds(i,j) : logodds(i,j)+logodds(i+1,k));          if (pair) {	    logodds2((nrows/2)+(i/2), dhash(j, k, alen)) =              (LOGODDS2B) ((i==(w-1) || k==alen) ?		logodds(w+i,j) : logodds(w+i,j)+logodds(w+i+1,k));          }        } /* letter 1 */      } /* letter 0 */    } /* motif position */    lo->logodds2 = logodds2;  } /* motif */} /* make_double_lo *//**********************************************************************//*	scale_lo	Scale and round the log-odds matrices so that all entries are	in range [0...range].	Matrix entries will be rounded to log_10(range) significant digits.	Bit score values  can be restored (with some loss of significance) by:		score_bit = (score/scale) + (w*offset)*//**********************************************************************/extern void scale_lo(  LO *los[],		/* array of pointers to log-odds matrices */  int nmotifs,		/* number of log-odds matrices in los */  int range  		/* set entries in matrices to [0..range] */){  int i, j, imotif;  /*    Compute the scale and offset factors for each motif    and apply them to the logodds matrices to scale them    to [0..range]  */  for (imotif=0; imotif<nmotifs; imotif++) {    LO *lo = los[imotif];               /* logodds structure */    int a = lo->alen;                   /* length of alphabet */    int w = lo->w;                      /* width of motif */    int size = w*range+1;		/* largest possible score */    double small = BIG;                 /* smallest entry pos logodds matrix */    double large = -BIG;                /* largest entry pos logodds matrix */    double smalln = BIG;                /* smallest entry neg logodds matrix */    double largen = -BIG;               /* largest entry neg logodds matrix */    /* find the smallest/largest entry in the logodds matrix */    for (i=0; i<w; i++) {      for (j=0; j<a; j++) {        small = MIN(small, lo->logodds(i,j));        large = MAX(large, lo->logodds(i,j));        if (lo->pair) {                 /* negative motif */          smalln = MIN(smalln, lo->logodds(w+i,j));          largen = MAX(largen, lo->logodds(w+i,j));        }      }    }	    /* skip this motif if it has no information */    if (large==small || (lo->pair && largen==smalln)) {      lo->scale = 0;      continue;    }    /* compute scale and offset so that matrix entries in [0..range] */    lo->scale = range/(large-small);	/* positive motif */    RND(lo->scale, RNDDIG, lo->scale);	/* round to RNDDIG places */    lo->offset = small;    if (lo->pair) {                     /* negative motif */      /* set scale factor and offset for 3-class scores */      lo->scalen = range/(largen-smalln);      lo->offsetn = smalln;      lo->ln_lambda1 = log(250000.0);	/* needed by score3class */      lo->ln_lambda2 = 0;		/* needed by score3class */      small = score3class(0, 0, lo);      large = score3class(size-1, size-1, lo);      lo->scale3 =  (size-1)/(large-small);      RND(lo->scale3, RNDDIG, lo->scale3);	/* round to RNDDIG places */      lo->offset3 = small;    }    /* scale, offset and round logodds matrix entries to range [0..range] *//*fprintf(stderr, "PSSM:\n"); *//*setup_hash_alph(DNAB); */    for (i=0; i<w; i++) {      for (j=0; j<a; j++) {        lo->logodds(i,j) =          bit_to_scaled(lo->logodds(i,j), 1, lo->scale, lo->offset);/*if (strchr(DNA0, unhash(j))) fprintf(stderr, "%d %c %5.2f ", j, unhash(j), lo->logodds(i,j));*/        if (lo->pair) {                 /* negative motif */          lo->logodds(w+i,j) =            bit_to_scaled(lo->logodds(w+i,j), 1, lo->scalen, lo->offsetn);        }      }/*fprintf(stderr, "\n");*/    }  } /* imotif */} /* scale_lo */  /**********************************************************************//*	shuffle_cols	Shuffle the columns of each motif.*//**********************************************************************/extern void shuffle_cols(  LO *los[],		/* array of pointers to log-odds matrices */  int nmotifs 		/* number of log-odds matrices in los */){  int i, j, imotif;  srand48(0);  for (imotif=0; imotif<nmotifs; imotif++) {    double tmp[MAXSITE+1][MAXALPH+1];	/* temporary logodds matrix */    int permute[MAXSITE];		/* list to permute */    LO *lo = los[imotif];    int w = lo->w; 			/* width of motif */    int a = lo->alen;			/* length of alphabet */    /* set up permute list */    for (i=0; i < w; i++) {		      permute[i] = i;    }    /* do 50 random swaps to permute list */    for (i=0; i < 50; i++) {		      int c1 = (int) (w * drand48());      int c2 = (int) (w * drand48());      swap(permute[c1], permute[c2], int);    }    /* announce */    printf("Permuting columns of motif %d: ", imotif+1);    for (i=0; i < w; i++) printf("%d ", permute[i]);    printf("\n");    /* move logodds to tmp in permuted column order */    for (i=0; i < w; i++) {      for (j=0; j < a; j++) {	tmp[i][j] = lo->logodds(permute[i], j);      }    }    /* copy tmp to logodds */    for (i=0; i < w; i++) {      for (j=0; j < a; j++) {	lo->logodds(i,j) = tmp[i][j];      }    }  }} /* shuffle_cols */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?