📄 malign.c
字号:
maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
for (i=1;i<=nseqs;i++)
{
maxid[i] = -1;
for (j=1;j<=nseqs;j++)
if (maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j];
}
/* clear the memory used for the phylogenetic tree */
if (nseqs >= 2)
clear_tree(NULL);
aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
ix = 0;
for (i=1;i<=istart+1;i++)
{
aligned[i] = 1;
++ix;
output_index[i] = i;
}
for (i=istart+2;i<=nseqs;i++) aligned[i] = 0;
/* for each unaligned sequence, find it's closest pair amongst the
aligned sequences. */
group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
while (ix < nseqs)
{
if (ix > 0)
{
for (i=1;i<=nseqs;i++) {
if (aligned[i] == 0)
{
maxid[i] = -1;
for (j=1;j<=nseqs;j++)
if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0))
maxid[i] = tmat[i][j];
}
}
}
/* find the most closely related sequence to those already aligned */
max = -1;
for (i=1;i<=nseqs;i++)
{
if ((aligned[i] == 0) && (maxid[i] > max))
{
max = maxid[i];
iseq = i;
}
}
/* align this sequence to the existing alignment */
entries = 0;
for (j=1;j<=nseqs;j++)
if (aligned[j] != 0)
{
group[j] = 1;
entries++;
}
else if (iseq==j)
{
group[j] = 2;
entries++;
}
aligned[iseq] = 1;
/* EITHER....., set sequence weights equal to percent identity with new sequence */
/*
for (j=0;j<nseqs;j++)
seq_weight[j] = tmat[j+1][iseq];
*/
/* OR...., multiply sequence weights from tree by percent identity with new sequence */
for (j=0;j<nseqs;j++)
seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];
if (debug>1)
for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf (stdout,"sequence %d: %d\n", j+1,tree_weight[j]);
/*
Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
*/
sum = 0;
for (j=0;j<nseqs;j++)
if (group[j+1] == 1) sum += seq_weight[j];
if (sum == 0)
{
for (j=0;j<nseqs;j++)
seq_weight[j] = 1;
sum = j;
}
for (j=0;j<nseqs;j++)
{
seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
if (seq_weight[j] < 1) seq_weight[j] = 1;
}
if (debug > 1) {
fprintf(stdout,"new weights\n");
for (j=0;j<nseqs;j++) if (group[j+1] == 1)fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
}
score = prfalign(group, aligned);
info("Sequence:%d Score:%d",(pint)iseq,(pint)score);
if (output_order == INPUT)
{
++ix;
output_index[iseq] = iseq;
}
else
output_index[++ix] = iseq;
}
group=ckfree((void *)group);
aligned=ckfree((void *)aligned);
maxid=ckfree((void *)maxid);
aln_score();
/* make the rest (output stuff) into routine clustal_out in file amenu.c */
return(nseqs);
}
sint palign1(void) /* a profile alignment */
{
sint i,j,temp;
sint entries;
sint *aligned, *group;
float dscore;
lint score;
info("Start of Initial Alignment");
/* calculate sequence weights according to branch lengths of the tree -
weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */
temp = INT_SCALE_FACTOR/nseqs;
for (i=0;i<nseqs;i++) seq_weight[i] = temp;
distance_tree = FALSE;
/* do the initial alignment......... */
group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
for(i=1; i<=profile1_nseqs; ++i)
group[i] = 1;
for(i=profile1_nseqs+1; i<=nseqs; ++i)
group[i] = 2;
entries = nseqs;
aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
for (i=1;i<=nseqs;i++) aligned[i] = 1;
score = prfalign(group, aligned);
info("Sequences:%d Score:%d",(pint)entries,(pint)score);
group=ckfree((void *)group);
aligned=ckfree((void *)aligned);
for (i=1;i<=nseqs;i++) {
for (j=i+1;j<=nseqs;j++) {
dscore = countid(i,j);
tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
tmat[j][i] = tmat[i][j];
}
}
return(nseqs);
}
float countid(sint s1, sint s2)
{
char c1,c2;
sint i;
sint count,total;
float score;
count = total = 0;
for (i=1;i<=seqlen_array[s1] && i<=seqlen_array[s2];i++) {
c1 = seq_array[s1][i];
c2 = seq_array[s2][i];
if ((c1>=0) && (c1<max_aa)) {
total++;
if (c1 == c2) count++;
}
}
if(total==0) score=0;
else
score = 100.0 * (float)count / (float)total;
return(score);
}
sint palign2(char *p1_tree_name,char *p2_tree_name) /* a profile alignment */
{
sint i,j,sum,entries,status;
lint score;
sint *aligned, *group;
sint *maxid,*p1_weight,*p2_weight;
sint dscore;
info("Start of Multiple Alignment");
/* get the phylogenetic trees from *.ph */
if (profile1_nseqs >= 2)
{
status = read_tree(p1_tree_name, (sint)0, profile1_nseqs);
if (status == 0) return(0);
}
/* calculate sequence weights according to branch lengths of the tree -
weights in global variable seq_weight normalised to sum to 100 */
p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) );
calc_seq_weights((sint)0, profile1_nseqs, p1_weight);
/* clear the memory for the phylogenetic tree */
if (profile1_nseqs >= 2)
clear_tree(NULL);
if (nseqs-profile1_nseqs >= 2)
{
status = read_tree(p2_tree_name, profile1_nseqs, nseqs);
if (status == 0) return(0);
}
p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) );
calc_seq_weights(profile1_nseqs,nseqs, p2_weight);
/* clear the memory for the phylogenetic tree */
if (nseqs-profile1_nseqs >= 2)
clear_tree(NULL);
/* convert tmat distances to similarities */
for (i=1;i<nseqs;i++)
for (j=i+1;j<=nseqs;j++) {
tmat[i][j]=100.0-tmat[i][j]*100.0;
tmat[j][i]=tmat[i][j];
}
/* weight sequences with max percent identity with other profile*/
maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
for (i=0;i<profile1_nseqs;i++) {
maxid[i] = 0;
for (j=profile1_nseqs+1;j<=nseqs;j++)
if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
seq_weight[i] = maxid[i]*p1_weight[i];
}
for (i=profile1_nseqs;i<nseqs;i++) {
maxid[i] = -1;
for (j=1;j<=profile1_nseqs;j++)
if(maxid[i]<tmat[i+1][j]) maxid[i] = tmat[i+1][j];
seq_weight[i] = maxid[i]*p2_weight[i];
}
/*
Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
*/
sum = 0;
for (j=0;j<nseqs;j++)
sum += seq_weight[j];
if (sum == 0)
{
for (j=0;j<nseqs;j++)
seq_weight[j] = 1;
sum = j;
}
for (j=0;j<nseqs;j++)
{
seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum;
if (seq_weight[j] < 1) seq_weight[j] = 1;
}
if (debug > 1) {
fprintf(stdout,"new weights\n");
for (j=0;j<nseqs;j++) fprintf( stdout,"sequence %d: %d\n", j+1,seq_weight[j]);
}
/* do the alignment......... */
info("Aligning...");
group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));
for(i=1; i<=profile1_nseqs; ++i)
group[i] = 1;
for(i=profile1_nseqs+1; i<=nseqs; ++i)
group[i] = 2;
entries = nseqs;
aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
for (i=1;i<=nseqs;i++) aligned[i] = 1;
score = prfalign(group, aligned);
info("Sequences:%d Score:%d",(pint)entries,(pint)score);
group=ckfree((void *)group);
p1_weight=ckfree((void *)p1_weight);
p2_weight=ckfree((void *)p2_weight);
aligned=ckfree((void *)aligned);
maxid=ckfree((void *)maxid);
/* DES output_index = (int *)ckalloc( (nseqs+1) * sizeof (int)); */
for (i=1;i<=nseqs;i++) output_index[i] = i;
return(nseqs);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -