📄 prfalign.c
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "clustalw.h"
#define ENDALN 127
#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))
/*
* Prototypes
*/
static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2);
static lint prfscore(sint n, sint m);
static sint gap_penalty1(sint i, sint j,sint k);
static sint open_penalty1(sint i, sint j);
static sint ext_penalty1(sint i, sint j);
static sint gap_penalty2(sint i, sint j,sint k);
static sint open_penalty2(sint i, sint j);
static sint ext_penalty2(sint i, sint j);
static void padd(sint k);
static void pdel(sint k);
static void palign(void);
static void ptracepath(sint *alen);
static void add_ggaps(void);
static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2);
/*
* Global variables
*/
extern double **tmat;
extern float gap_open, gap_extend;
extern float transition_weight;
extern sint gap_pos1, gap_pos2;
extern sint max_aa;
extern sint nseqs;
extern sint *seqlen_array;
extern sint *seq_weight;
extern sint debug;
extern Boolean neg_matrix;
extern sint mat_avscore;
extern short blosum30mt[], blosum40mt[], blosum45mt[];
extern short blosum62mt2[], blosum80mt[];
extern short pam20mt[], pam60mt[];
extern short pam120mt[], pam160mt[], pam350mt[];
extern short gon40mt[], gon80mt[];
extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[];
extern short clustalvdnamt[],swgapdnamt[];
extern short idmat[];
extern short usermat[];
extern short userdnamat[];
extern Boolean user_series;
extern UserMatSeries matseries;
extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[];
extern sint max_aln_length;
extern Boolean distance_tree;
extern Boolean dnaflag;
extern char mtrxname[];
extern char dnamtrxname[];
extern char **seq_array;
extern char *amino_acid_codes;
extern char *gap_penalty_mask1,*gap_penalty_mask2;
extern char *sec_struct_mask1,*sec_struct_mask2;
extern sint struct_penalties1, struct_penalties2;
extern Boolean use_ss1, use_ss2;
extern Boolean endgappenalties;
static sint print_ptr,last_print;
static sint *displ;
static char **alignment;
static sint *aln_len;
static sint *aln_weight;
static char *aln_path1, *aln_path2;
static sint alignment_len;
static sint **profile1, **profile2;
static lint *HH, *DD, *RR, *SS;
static lint *gS;
static sint matrix[NUMRES][NUMRES];
static sint nseqs1, nseqs2;
static sint prf_length1, prf_length2;
static sint *gaps;
static sint gapcoef1,gapcoef2;
static sint lencoef1,lencoef2;
static Boolean switch_profiles;
lint prfalign(sint *group, sint *aligned)
{
static Boolean found;
static Boolean negative;
static Boolean error_given=FALSE;
static sint i, j, count = 0;
static sint NumSeq;
static sint len, len1, len2, is, minlen;
static sint se1, se2, sb1, sb2;
static sint maxres;
static sint int_scale;
static short *matptr;
static short *mat_xref;
static char c;
static lint score;
static float scale;
static double logmin,logdiff;
static double pcid;
alignment = (char **) ckalloc( nseqs * sizeof (char *) );
aln_len = (sint *) ckalloc( nseqs * sizeof (sint) );
aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) );
for (i=0;i<nseqs;i++)
if (aligned[i+1] == 0) group[i+1] = 0;
nseqs1 = nseqs2 = 0;
for (i=0;i<nseqs;i++)
{
if (group[i+1] == 1) nseqs1++;
else if (group[i+1] == 2) nseqs2++;
}
if ((nseqs1 == 0) || (nseqs2 == 0)) return(0.0);
if (nseqs2 > nseqs1)
{
switch_profiles = TRUE;
for (i=0;i<nseqs;i++)
{
if (group[i+1] == 1) group[i+1] = 2;
else if (group[i+1] == 2) group[i+1] = 1;
}
}
else
switch_profiles = FALSE;
int_scale = 100;
/*
calculate the mean of the sequence pc identities between the two groups
*/
count = 0;
pcid = 0.0;
negative=neg_matrix;
for (i=0;i<nseqs;i++)
{
if (group[i+1] == 1)
for (j=0;j<nseqs;j++)
if (group[j+1] == 2)
{
count++;
pcid += tmat[i+1][j+1];
}
}
pcid = pcid/(float)count;
if (debug > 0) fprintf(stdout,"mean tmat %3.1f\n", pcid);
/*
Make the first profile.
*/
prf_length1 = 0;
for (i=0;i<nseqs;i++)
if (group[i+1] == 1)
if(seqlen_array[i+1]>prf_length1) prf_length1=seqlen_array[i+1];
nseqs1 = 0;
if (debug>0) fprintf(stdout,"sequences profile 1:\n");
for (i=0;i<nseqs;i++)
{
if (group[i+1] == 1)
{
if (debug>0) {
extern char **names;
fprintf(stdout,"%s\n",names[i+1]);
}
len = seqlen_array[i+1];
alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) );
for (j=0;j<len;j++)
alignment[nseqs1][j] = seq_array[i+1][j+1];
for(j=len;j<prf_length1;j++)
alignment[nseqs1][j+1]=gap_pos1;
alignment[nseqs1][prf_length1+1] = ENDALN;
aln_len[nseqs1] = prf_length1;
aln_weight[nseqs1] = seq_weight[i];
nseqs1++;
}
}
/*
Make the second profile.
*/
prf_length2 = 0;
for (i=0;i<nseqs;i++)
if (group[i+1] == 2)
if(seqlen_array[i+1]>prf_length2) prf_length2=seqlen_array[i+1];
nseqs2 = 0;
if (debug>0) fprintf(stdout,"sequences profile 2:\n");
for (i=0;i<nseqs;i++)
{
if (group[i+1] == 2)
{
if (debug>0) {
extern char **names;
fprintf(stdout,"%s\n",names[i+1]);
}
len = seqlen_array[i+1];
alignment[nseqs1+nseqs2] =
(char *) ckalloc( (prf_length2+2) * sizeof (char) );
for (j=0;j<len;j++)
alignment[nseqs1+nseqs2][j] = seq_array[i+1][j+1];
for(j=len;j<prf_length2;j++)
alignment[nseqs1+nseqs2][j+1]=gap_pos1;
alignment[nseqs1+nseqs2][j] = ENDALN;
aln_len[nseqs1+nseqs2] = prf_length2;
aln_weight[nseqs1+nseqs2] = seq_weight[i];
nseqs2++;
}
}
max_aln_length = prf_length1 + prf_length2+2;
/*
calculate real length of profiles - removing gaps!
*/
len1=0;
for (i=0;i<nseqs1;i++)
{
is=0;
for (j=0; j<MIN(aln_len[i],prf_length1); j++)
{
c = alignment[i][j];
if ((c !=gap_pos1) && (c != gap_pos2)) is++;
}
len1+=is;
}
len1/=(float)nseqs1;
len2=0;
for (i=nseqs1;i<nseqs2+nseqs1;i++)
{
is=0;
for (j=0; j<MIN(aln_len[i],prf_length2); j++)
{
c = alignment[i][j];
if ((c !=gap_pos1) && (c != gap_pos2)) is++;
}
len2+=is;
}
len2/=(float)nseqs2;
if (dnaflag)
{
scale=1.0;
if (strcmp(dnamtrxname, "iub") == 0)
{
matptr = swgapdnamt;
mat_xref = def_dna_xref;
}
else if (strcmp(dnamtrxname, "clustalw") == 0)
{
matptr = clustalvdnamt;
mat_xref = def_dna_xref;
scale=0.66;
}
else
{
matptr = userdnamat;
mat_xref = dna_xref;
}
maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale);
if (maxres == 0) return((sint)-1);
/*
matrix[0][4]=transition_weight*matrix[0][0];
matrix[4][0]=transition_weight*matrix[0][0];
matrix[2][11]=transition_weight*matrix[0][0];
matrix[11][2]=transition_weight*matrix[0][0];
matrix[2][12]=transition_weight*matrix[0][0];
matrix[12][2]=transition_weight*matrix[0][0];
*/
/* fix suggested by Chanan Rubin at Compugen */
matrix[mat_xref[0]][mat_xref[4]]=transition_weight*matrix[0][0];
matrix[mat_xref[4]][mat_xref[0]]=transition_weight*matrix[0][0];
matrix[mat_xref[2]][mat_xref[11]]=transition_weight*matrix[0][0];
matrix[mat_xref[11]][mat_xref[2]]=transition_weight*matrix[0][0];
matrix[mat_xref[2]][mat_xref[12]]=transition_weight*matrix[0][0];
matrix[mat_xref[12]][mat_xref[2]]=transition_weight*matrix[0][0];
gapcoef1 = gapcoef2 = 100.0 * gap_open *scale;
lencoef1 = lencoef2 = 100.0 * gap_extend *scale;
}
else
{
if(len1==0 || len2==0) {
logmin=1.0;
logdiff=1.0;
}
else {
minlen = MIN(len1,len2);
logmin = 1.0/log10((double)minlen);
if (len2<len1)
logdiff = 1.0+0.5*log10((double)((float)len2/(float)len1));
else if (len1<len2)
logdiff = 1.0+0.5*log10((double)((float)len1/(float)len2));
else logdiff=1.0;
if(logdiff<0.9) logdiff=0.9;
}
if(debug>0) fprintf(stdout,"%d %d logmin %f logdiff %f\n",
(pint)len1,(pint)len2, logmin,logdiff);
scale=0.75;
if (strcmp(mtrxname, "blosum") == 0)
{
scale=0.75;
if (negative || distance_tree == FALSE) matptr = blosum40mt;
else if (pcid > 80.0)
{
matptr = blosum80mt;
}
else if (pcid > 60.0)
{
matptr = blosum62mt2;
}
else if (pcid > 40.0)
{
matptr = blosum45mt;
}
else if (pcid > 30.0)
{
scale=0.5;
matptr = blosum45mt;
}
else if (pcid > 20.0)
{
scale=0.6;
matptr = blosum45mt;
}
else
{
scale=0.6;
matptr = blosum30mt;
}
mat_xref = def_aa_xref;
}
else if (strcmp(mtrxname, "pam") == 0)
{
scale=0.75;
if (negative || distance_tree == FALSE) matptr = pam120mt;
else if (pcid > 80.0) matptr = pam20mt;
else if (pcid > 60.0) matptr = pam60mt;
else if (pcid > 40.0) matptr = pam120mt;
else matptr = pam350mt;
mat_xref = def_aa_xref;
}
else if (strcmp(mtrxname, "gonnet") == 0)
{
scale/=2.0;
if (negative || distance_tree == FALSE) matptr = gon250mt;
else if (pcid > 35.0)
{
matptr = gon80mt;
scale/=2.0;
}
else if (pcid > 25.0)
{
if(minlen<100) matptr = gon250mt;
else matptr = gon120mt;
}
else
{
if(minlen<100) matptr = gon350mt;
else matptr = gon160mt;
}
mat_xref = def_aa_xref;
int_scale /= 10;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -