📄 dpalign.c
字号:
/* * $Id: dpalign.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.1 2005/07/29 00:18:53 nadya * Initial revision * *//*#define DEBUG*//* dpalign.c *//************************************************************************* Copyright ** (1999) The Regents of the University of California. ** All Rights Reserved. ** Author: Timothy L. Bailey* ** Permission to use, copy, modify, and distribute any part of ** this software for educational, research and non-profit purposes,** without fee, and without a written agreement is hereby granted, ** provided that the above copyright notice, this paragraph and ** the following three paragraphs appear in all copies. ** ** Those desiring to incorporate this software into commercial ** products or use for commercial purposes should contact the ** Technology Transfer Office, University of California, San Diego,** 9500 Gilman Drive, La Jolla, California, 92093-09m, ** Ph: (619) 534 5815. ** ** IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ** ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR ** CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF ** THE USE OF THIS SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA ** HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ** ** THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE ** UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE ** MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. ** THE UNIVERSITY OF CALIFORNIA MAKES NO REPRESENTATIONS AND ** EXTENDS NO WARRANTIES OF ANY KIND, EITHER EXPRESSED OR IMPLIED, ** INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT ** THE USE OF THE MATERIAL WILL NOT INFRINGE ANY PATENT, ** TRADEMARK OR OTHER RIGHTS. *************************************************************************//* 10-3-99 tlb; replace nrdb with back */#ifdef DPALIGN_SO#define DEFINE_GLOBALS#endif#include "meme.h"#include "general.h"/* data structures*/typedef struct dp_table_entry { /* entry in dp_table */ double v; /* score: max(e, f, g) */ double e; /* score when gap in first seq here */ double f; /* score when gap in second seq here */ double g; /* score when no gap at this position */} DP_TABLE_ENTRY;typedef DP_TABLE_ENTRY **DP_TABLE; /* rows: seq1 colums: seq2 */#define MATCH(i, j) logodds((i)-1, (int)eseq[(j)-1])/******************************************************************************//* dp_align Align a sequence to a logodds matrix using dynamic programming. Returns a string containing the two aligned sequences each terminated by a null.*//******************************************************************************/extern char *dp_align( int alength, /* length of alphabet */ int n, /* length of logodds matrix */ LOGODDS logodds, /* logodds matrix */ char *seq1, /* consensus of logodds */ int m, /* length of sequence */ char *eseq, /* integer encoded sequence */ double wg, /* gap cost (initialization) */ double ws, /* space cost (extension) */ BOOLEAN endgaps /* penalize end gaps if TRUE */){ int i, j, k, ii; int gaptype; /* used in traceback */ DP_TABLE dpt; /* dyn. prog. table */ int best_i, best_j; /* best endpoint */ char *align1=NULL, *align2=NULL; /* create the dynamic programming table */ create_2array(dpt, DP_TABLE_ENTRY, n+1, m+1); /* initialize the table */ dpt[0][0].v = dpt[0][0].f = 0; for (i=0; i<=n; i++)dpt[i][0].v = dpt[i][0].f = dpt[i][0].g = dpt[i][0].e = 0; for (j=0; j<=m; j++)dpt[0][j].v = dpt[0][j].f = dpt[0][j].g = dpt[0][j].e = 0; if (endgaps) { /* penalize end gaps */ for (i=1; i<=n; i++) dpt[i][0].v = dpt[i][0].e = -wg - i*ws; for (j=1; j<=m; j++) dpt[0][j].v = dpt[0][j].f = -wg - j*ws; } else { /* don't penalize end gaps */ for (i=1; i<=n; i++) dpt[i][0].v = dpt[i][0].e = 0; for (j=1; j<=m; j++) dpt[0][j].v = dpt[0][j].f = 0; } /* fill up the table row by row; note: row in logodds matrix corresponds to a position in seq1 */ best_i = 1; best_j = m; for (i=1; i<=n; i++) { /* row: logodds matrix col */ for (j=1; j<=m; j++) { /* column: seq2 */ dpt[i][j].e = MAX(dpt[i][j-1].e, dpt[i][j-1].v-wg)-ws; dpt[i][j].f = MAX(dpt[i-1][j].f, dpt[i-1][j].v-wg)-ws; dpt[i][j].g = dpt[i-1][j-1].v + MATCH(i,j); dpt[i][j].v = MAX(dpt[i][j].e, MAX(dpt[i][j].f, dpt[i][j].g)); if (!endgaps && (j==m || i==n) && dpt[i][j].v>dpt[best_i][best_j].v) { best_i = i; best_j = j; } } /* column */ } /* row */#ifdef DEBUG /* print the dp table */ printf("dp table\n"); for(i=-1; i<=m; i++) printf("%-3c %3.3s ", i==0 ? '-' : i>0 ? unhash(eseq[i-1]) : ' ', " "); printf("\n"); for (i=0; i<=n; i++) { printf("%3c ", i>0 ? seq1[i-1] : '-'); for (j=0; j<=m; j++) { printf("%3.0f %3.0f ", dpt[i][j].g, dpt[i][j].f); } printf("\n"); printf("%3.3s ", " "); for (j=0; j<=m; j++) { printf("%3.0f %3.0f ", dpt[i][j].e, dpt[i][j].v); } printf("\n\n"); } printf("best_i = %d best_j = %d\n", best_i, best_j);#endif /* trace the optimal alignment and put into arrays align1 and align2 */ Resize(align1, m+n+1, char); Resize(align2, m+n+1, char); k = 0; /* pointer into alignment */ gaptype = 0; if (!endgaps) { /* print end gap */ for (i=n; i>best_i;) { align1[k] = seq1[--i]; align2[k++] = ' '; gaptype = (dpt[i][m].v-wg-ws == dpt[i+1][m].f) ? 0 : 2; } for (j=m; j>best_j;) { align1[k] = ' '; align2[k++] = unhash(eseq[--j]); gaptype = (dpt[n][j].v-wg-ws == dpt[n][j+1].e) ? 0 : 1; } } /* endgap */ for (i=endgaps?n:best_i, j=endgaps?m:best_j; i>0 && j>0 && k<=m+n; ) {#ifdef DEBUG printf("i %d j %d k %d e %f f %f g %f v %f gt %d %c %c\n", i,j,k, dpt[i][j].e, dpt[i][j].f, dpt[i][j].g, dpt[i][j].v, gaptype, seq1[i-1], unhash(eseq[j-1]));#endif switch (gaptype) { case 0: /* not in gap */ if (dpt[i][j].g > dpt[i][j].e) { if (dpt[i][j].g > dpt[i][j].f) { /* match */ align1[k] = seq1[--i]; align2[k++] = unhash(eseq[--j]); } else { /* gap in seq2 */ align1[k] = seq1[--i]; align2[k++] = ' '; gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2; } } else { if (dpt[i][j].e > dpt[i][j].f) { /* gap in seq1 */ align1[k] = ' '; align2[k++] = unhash(eseq[--j]); gaptype = (dpt[i][j].v-wg-ws == dpt[i][j+1].e) ? 0 : 1; } else { /* gap in seq2 */ align1[k] = seq1[--i]; align2[k++] = ' '; gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2; } } break; case 1: /* in seq1 gap */ align1[k] = ' '; align2[k++] = unhash(eseq[--j]); gaptype = (dpt[i][j].v-wg-ws == dpt[i][j+1].e) ? 0 : 1; break; case 2: /* in seq2 gap */ align1[k] = seq1[--i]; align2[k++] = ' '; gaptype = (dpt[i][j].v-wg-ws == dpt[i+1][j].f) ? 0 : 2; break; } /* gaptype */ } /* i, j, k */ /* fill in remainder of seq1 */ for (ii=i; ii>0; ii--) { align1[k] = seq1[ii-1]; align2[k++] = ' '; } for (ii=j; ii>0; ii--) { align2[k] = unhash(eseq[ii-1]); align1[k++] = ' '; } /* reverse the alignment */ for (i=0; i<k/2; i++) { SWAP(char, align1[i], align1[k-1-i]); SWAP(char, align2[i], align2[k-1-i]); } align1[k] = align2[k] = '\0'; /* combine the alignments into one string */ Resize(align1, 2*k+3, char); for (i=0, j=k+1; i<=k; i++, j++) align1[j] = align2[i]; myfree(align2); /*printf("%s\n%s\n", align1, align1+k+1);*/ /* free the dynamic programming table */ free_2array(dpt, n+1); return align1;} /* dp_align *//******************************************************************************//* dp_ma_motif_seqs Perform a multiple alignment of a motif versus a set of sequences. Returns and an array of sequences. *//******************************************************************************/extern char **dp_ma_motif_seqs( int alength, /* length of sequence alphabet */ int w, /* length of motif */ LOGODDS logodds, /* motif log-odds matrix */ char *cons, /* consensus sequence of motif */ int n, /* number of sequences */ char **eseqs, /* (encoded) sequences to align */ int *lengths, /* sequence lengths */ char **names, /* sequence names */ double wg, /* gap cost (initialization) */ double ws, /* space cost (extension) */ BOOLEAN endgaps /* penalize end gaps if TRUE */){ int i, j, k, l, nspaces; char c; char **motif=NULL, **seq2=NULL; /* aligned motif/sequence pairs */ int *spaces=NULL; /* max. number of spaces in alignmement preceeding position i of motif */ int *pos1=NULL; /* position of char i of motif in alignment */ char **alignment; /* the alignment */ int maxl; /* maximum sequence length */ /* get maximum sequence length */ for (i=0, maxl=w; i<n; i++) maxl = MAX(maxl, lengths[i]); /* create arrays of aligned pairs */ Resize(motif, n, char *); Resize(seq2, n, char *); /* align motif to each sequence */ for (i=0; i<n; i++) { char *pa; /* pairwise alignment */ pa = dp_align(alength, w, logodds, cons, lengths[i], eseqs[i], wg, ws, endgaps); motif[i] = pa; /* start of motif */ while (*pa) pa++; /* find end of motif */ seq2[i] = ++pa; /* start of seq2 */ } /* combine alignments into one multiple alignment */ /* compute maximum number of spaces preceeding each position in motif */ Resize(spaces, maxl+1, int); for (i=0; i<=w; i++) spaces[i] = 0; for (i=0; i<n; i++) { /* alignment */ for (j=k=nspaces=0; ; j++) { /* position in aligned motif */ c = motif[i][j]; /* character in aligned motif */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -