dpalign.c

来自「EM算法的改进」· C语言代码 · 共 599 行 · 第 1/2 页
599 行
/* * $Id: dpalign.c 1339 2006-09-21 19:46:28Z tbailey $ *  * $Log$ * Revision 1.1  2005/07/29 00:18:53  nadya * Initial revision * *//*#define DEBUG*//* dpalign.c *//*************************************************************************	Copyright							**	(1999) The Regents of the University of California.		**	All Rights Reserved.						**	Author: Timothy L. Bailey*									**	Permission to use, copy, modify, and distribute any part of 	**	this software for educational, research and non-profit purposes,**	without fee, and without a written agreement is hereby granted, **	provided that the above copyright notice, this paragraph and 	**	the following three paragraphs appear in all copies.		**									**	Those desiring to incorporate this software into commercial 	**	products or use for commercial purposes should contact the 	**	Technology Transfer Office, University of California, San Diego,**	9500 Gilman Drive, La Jolla, California, 92093-09m, 		**	Ph: (619) 534 5815.						**									**	IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO 	**	ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR 	**	CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF 	**	THE USE OF THIS SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA 	**	HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 		**									**	THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE **	UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE 		**	MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.  **	THE UNIVERSITY OF CALIFORNIA MAKES NO REPRESENTATIONS AND 	**	EXTENDS NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, **	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 	**	MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT 	**	THE USE OF THE MATERIAL WILL NOT INFRINGE ANY PATENT, 		**	TRADEMARK OR OTHER RIGHTS.  					*************************************************************************//* 10-3-99 tlb; replace nrdb with back */#ifdef DPALIGN_SO#define DEFINE_GLOBALS#endif#include "meme.h"#include "general.h"/*  data structures*/typedef struct dp_table_entry {		/* entry in dp_table */  double v;				/* score: max(e, f, g) */  double e;				/* score when gap in first seq here */  double f;				/* score when gap in second seq here */  double g;				/* score when no gap at this position */} DP_TABLE_ENTRY;typedef DP_TABLE_ENTRY **DP_TABLE;	/* rows: seq1 colums: seq2 */#define MATCH(i, j) logodds((i)-1, (int)eseq[(j)-1])/******************************************************************************//*	dp_align	Align a sequence to a logodds matrix using dynamic programming.	Returns a string containing the two aligned sequences each	terminated by a null.*//******************************************************************************/extern char *dp_align(  int alength,					/* length of alphabet */  int n,					/* length of logodds matrix */  LOGODDS logodds,				/* logodds matrix */  char *seq1,					/* consensus of logodds */  int m,					/* length of sequence */  char *eseq,					/* integer encoded sequence */  double wg,					/* gap cost (initialization) */   double ws,					/* space cost (extension) */   BOOLEAN endgaps 				/* penalize end gaps if TRUE */){  int i, j, k, ii;  int gaptype;					/* used in traceback */  DP_TABLE dpt;					/* dyn. prog. table */  int best_i, best_j;				/* best endpoint */  char *align1=NULL, *align2=NULL;  /* create the dynamic programming table */  create_2array(dpt, DP_TABLE_ENTRY, n+1, m+1);  /* initialize the table */  dpt[0][0].v = dpt[0][0].f = 0;  for (i=0; i<=n; i++)dpt[i][0].v = dpt[i][0].f = dpt[i][0].g = dpt[i][0].e = 0;  for (j=0; j<=m; j++)dpt[0][j].v = dpt[0][j].f = dpt[0][j].g = dpt[0][j].e = 0;  if (endgaps) {				/* penalize end gaps */    for (i=1; i<=n; i++) dpt[i][0].v = dpt[i][0].e = -wg - i*ws;    for (j=1; j<=m; j++) dpt[0][j].v = dpt[0][j].f = -wg - j*ws;  } else {					/* don't penalize end gaps */    for (i=1; i<=n; i++) dpt[i][0].v = dpt[i][0].e = 0;    for (j=1; j<=m; j++) dpt[0][j].v = dpt[0][j].f = 0;  }  /* fill up the table row by row; note: row in logodds matrix corresponds     to a position in seq1   */  best_i = 1; best_j = m;  for (i=1; i<=n; i++) {			/* row: logodds matrix col */    for (j=1; j<=m; j++) {			/* column: seq2 */      dpt[i][j].e = MAX(dpt[i][j-1].e, dpt[i][j-1].v-wg)-ws;      dpt[i][j].f = MAX(dpt[i-1][j].f, dpt[i-1][j].v-wg)-ws;      dpt[i][j].g = dpt[i-1][j-1].v + MATCH(i,j);      dpt[i][j].v = MAX(dpt[i][j].e, MAX(dpt[i][j].f, dpt[i][j].g));      if (!endgaps && (j==m || i==n) && dpt[i][j].v>dpt[best_i][best_j].v) {        best_i = i; best_j = j;       }    } /* column */  } /* row */#ifdef DEBUG  /* print the dp table */  printf("dp table\n");  for(i=-1; i<=m; i++) printf("%-3c %3.3s  ", i==0 ? '-' : i>0 ? unhash(eseq[i-1]) : ' ', " ");  printf("\n");  for (i=0; i<=n; i++) {    printf("%3c ", i>0 ? seq1[i-1] : '-');    for (j=0; j<=m; j++) {      printf("%3.0f %3.0f  ", dpt[i][j].g, dpt[i][j].f);    }    printf("\n");    printf("%3.3s ", " ");    for (j=0; j<=m; j++) {      printf("%3.0f %3.0f  ", dpt[i][j].e, dpt[i][j].v);    }    printf("\n\n");  }  printf("best_i = %d best_j = %d\n", best_i, best_j);#endif  /*     trace the optimal alignment and put into arrays align1 and align2   */  Resize(align1, m+n+1, char);  Resize(align2, m+n+1, char);  k = 0;					/* pointer into alignment */  gaptype = 0;  if (!endgaps) {				/* print end gap */    for (i=n; i>best_i;) {      align1[k] = seq1[--i];      align2[k++] = ' ';      gaptype = (dpt[i][m].v-wg-ws == dpt[i+1][m].f) ? 0 : 2;    }    for (j=m; j>best_j;) {      align1[k] = ' ';      align2[k++] = unhash(eseq[--j]);      gaptype = (dpt[n][j].v-wg-ws == dpt[n][j+1].e) ? 0 : 1;    }  } /* endgap */  for (i=endgaps?n:best_i, j=endgaps?m:best_j; i>0 && j>0 && k<=m+n; ) {#ifdef DEBUG    printf("i %d j %d k %d e %f f %f g %f v %f gt %d %c %c\n", i,j,k,      dpt[i][j].e, dpt[i][j].f, dpt[i][j].g, dpt[i][j].v, gaptype, seq1[i-1],       unhash(eseq[j-1]));#endif    switch (gaptype) {      case 0:					/* not in gap */	if (dpt[i][j].g > dpt[i][j].e) {	  if (dpt[i][j].g > dpt[i][j].f) {	/* match */	    align1[k] = seq1[--i];	    align2[k++] = unhash(eseq[--j]);	  } else {				/* gap in seq2 */	    align1[k] = seq1[--i];	    align2[k++] = ' ';            gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2;	  }	} else {	  if (dpt[i][j].e > dpt[i][j].f) {	/* gap in seq1 */	    align1[k] = ' ';	    align2[k++] = unhash(eseq[--j]);            gaptype = (dpt[i][j].v-wg-ws == dpt[i][j+1].e) ? 0 : 1;	  } else {				/* gap in seq2 */	    align1[k] = seq1[--i];	    align2[k++] = ' ';            gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2;	  }	}        break;      case 1:					/* in seq1 gap */	align1[k] = ' ';	align2[k++] = unhash(eseq[--j]);	gaptype = (dpt[i][j].v-wg-ws == dpt[i][j+1].e) ? 0 : 1;        break;      case 2:					/* in seq2 gap */	align1[k] = seq1[--i];	align2[k++] = ' ';	gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2;        break;    } /* gaptype */  } /* i, j, k */  /* fill in remainder of seq1 */  for (ii=i; ii>0; ii--) {    align1[k] = seq1[ii-1];    align2[k++] = ' ';  }  for (ii=j; ii>0; ii--) {    align2[k] = unhash(eseq[ii-1]);    align1[k++] = ' ';  }  /* reverse the alignment */  for (i=0; i<k/2; i++) {    SWAP(char, align1[i], align1[k-1-i]);    SWAP(char, align2[i], align2[k-1-i]);  }  align1[k] = align2[k] = '\0';  /* combine the alignments into one string */  Resize(align1, 2*k+3, char);  for (i=0, j=k+1; i<=k; i++, j++) align1[j] = align2[i];  myfree(align2);  /*printf("%s\n%s\n", align1, align1+k+1);*/  /* free the dynamic programming table */  free_2array(dpt, n+1);  return align1;} /* dp_align *//******************************************************************************//*	dp_ma_motif_seqs	Perform a multiple alignment of a motif versus a set of sequences.	Returns and an array of sequences.    *//******************************************************************************/extern char **dp_ma_motif_seqs(  int alength,				/* length of sequence alphabet */  int w,				/* length of motif */  LOGODDS logodds,			/* motif log-odds matrix */  char *cons,				/* consensus sequence of motif */  int n,				/* number of sequences */  char **eseqs,				/* (encoded) sequences to align */  int *lengths,				/* sequence lengths */  char **names,				/* sequence names */  double wg,				/* gap cost (initialization) */   double ws,				/* space cost (extension) */   BOOLEAN endgaps			/* penalize end gaps if TRUE */){  int i, j, k, l, nspaces;  char c;  char **motif=NULL, **seq2=NULL;	/* aligned motif/sequence pairs */  int *spaces=NULL;			/* max. number of spaces in alignmement						   preceeding position i of motif */  int *pos1=NULL;			/* position of char i of motif in 					   alignment */  char **alignment;			/* the alignment */  int maxl;				/* maximum sequence length */  /* get maximum sequence length */  for (i=0, maxl=w; i<n; i++) maxl = MAX(maxl, lengths[i]);  /* create arrays of aligned pairs */  Resize(motif, n, char *);  Resize(seq2, n, char *);  /*     align motif to each sequence   */  for (i=0; i<n; i++) {    char *pa;				/* pairwise alignment */    pa = dp_align(alength, w, logodds, cons, lengths[i], eseqs[i], wg, ws,       endgaps);    motif[i] = pa; 			/* start of motif */    while (*pa) pa++;			/* find end of motif */    seq2[i] = ++pa;			/* start of seq2 */  }  /*     combine alignments into one multiple alignment   */  /* compute maximum number of spaces preceeding each position in motif */  Resize(spaces, maxl+1, int);  for (i=0; i<=w; i++) spaces[i] = 0;  for (i=0; i<n; i++) {				/* alignment */    for (j=k=nspaces=0; ; j++) {		/* position in aligned motif */      c = motif[i][j];				/* character in aligned motif */
dpalign.c - 源码说明

本页面展示了「EM算法的改进」中的 dpalign.c 源码文件，采用 C语言编程语言编写，共 599 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与法的改进相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?