📄 malign.c
字号:
#include <stdio.h>#include <string.h>#include <ctype.h>#include <stdlib.h>#include "clustalw.h"/* * Prototypes *//* * Global Variables */extern double **tmat;extern Boolean no_weights;extern sint debug;extern sint max_aa;extern sint nseqs;extern sint profile1_nseqs;extern sint nsets;extern sint **sets;extern sint divergence_cutoff;extern sint *seq_weight;extern sint output_order, *output_index;extern Boolean distance_tree;extern char seqname[];extern sint *seqlen_array;extern char **seq_array;sint malign(sint istart,char *phylip_name) /* full progressive alignment*/{ static sint *aligned; static sint *group; static sint ix; sint *maxid, max, sum; sint *tree_weight; sint i,j,set,iseq=0; sint status,entries; lint score = 0; info("Start of Multiple Alignment");/* get the phylogenetic tree from *.ph */ if (nseqs >= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return((sint)0); }/* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight);/* recalculate tmat matrix as percent similarity matrix */ status = calc_similarities(nseqs); if (status == 0) return((sint)0);/* for each sequence, find the most closely related sequence */ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); for (i=1;i<=nseqs;i++) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; }/* group the sequences according to their relative divergence */ if (istart == 0) { sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) ); for(i=0;i<=nseqs;i++) sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); create_sets((sint)0,nseqs); info("There are %d groups",(pint)nsets);/* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL);/* start the multiple alignments......... */ info("Aligning...");/* first pass, align closely related sequences first.... */ ix = 0; aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); for (i=0;i<=nseqs;i++) aligned[i] = 0; for(set=1;set<=nsets;++set) { entries=0; for (i=1;i<=nseqs;i++) { if ((sets[set][i] != 0) && (maxid[i] > divergence_cutoff)) { entries++; if (aligned[i] == 0) { if (output_order==INPUT) { ++ix; output_index[i] = i; } else output_index[++ix] = i; aligned[i] = 1; } } } if(entries > 0) score = prfalign(sets[set], aligned); else score=0.0;/* negative score means fatal error... exit now! */ if (score < 0) { return(-1); } if ((entries > 0) && (score > 0)) info("Group %d: Sequences:%4d Score:%d", (pint)set,(pint)entries,(pint)score); else info("Group %d: Delayed", (pint)set); } for (i=0;i<=nseqs;i++) sets[i]=ckfree((void *)sets[i]); sets=ckfree(sets); } else {/* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); ix = 0; for (i=1;i<=istart+1;i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; }/* second pass - align remaining, more divergent sequences..... *//* if not all sequences were aligned, for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i<nseqs;i++) tree_weight[i] = seq_weight[i];/* if we haven't aligned any sequences, in the first pass - align thetwo most closely related sequences now */ if(ix==0) { max = -1; iseq = 0; for (i=1;i<=nseqs;i++) { for (j=i+1;j<=nseqs;j++) { if (max < tmat[i][j]) { max = tmat[i][j]; iseq = i; } } } aligned[iseq]=1; if (output_order == INPUT) { ++ix; output_index[iseq] = iseq; } else output_index[++ix] = iseq; } while (ix < nseqs) { for (i=1;i<=nseqs;i++) { if (aligned[i] == 0) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) maxid[i] = tmat[i][j]; } }/* find the most closely related sequence to those already aligned */ max = -1; iseq = 0; for (i=1;i<=nseqs;i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } }/* align this sequence to the existing alignment *//* weight sequences with percent identity with profile*//* OR...., multiply sequence weights from tree by percent identity with new sequence */ if(no_weights==FALSE) { for (j=0;j<nseqs;j++) if (aligned[j+1] != 0) seq_weight[j] = tree_weight[j] * tmat[j+1][iseq];/* Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR*/ sum = 0; for (j=0;j<nseqs;j++) if (aligned[j+1] != 0) sum += seq_weight[j]; if (sum == 0) { for (j=0;j<nseqs;j++) seq_weight[j] = 1; sum = j; } for (j=0;j<nseqs;j++) if (aligned[j+1] != 0) { seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum; if (seq_weight[j] < 1) seq_weight[j] = 1; } } entries = 0; for (j=1;j<=nseqs;j++) if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq==j) { group[j] = 2; entries++; } aligned[iseq] = 1; score = prfalign(group, aligned); info("Sequence:%d Score:%d",(pint)iseq,(pint)score); if (output_order == INPUT) { ++ix; output_index[iseq] = iseq; } else output_index[++ix] = iseq; } group=ckfree((void *)group); aligned=ckfree((void *)aligned); maxid=ckfree((void *)maxid); tree_weight=ckfree((void *)tree_weight); aln_score();/* make the rest (output stuff) into routine clustal_out in file amenu.c */ return(nseqs);}sint seqalign(sint istart,char *phylip_name) /* sequence alignment to existing profile */{ static sint *aligned, *tree_weight; static sint *group; static sint ix; sint *maxid, max; sint i,j,status,iseq; sint sum,entries; lint score = 0; info("Start of Multiple Alignment");/* get the phylogenetic tree from *.ph */ if (nseqs >= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return(0); }/* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i<nseqs;i++) tree_weight[i] = seq_weight[i];/* recalculate tmat matrix as percent similarity matrix */ status = calc_similarities(nseqs); if (status == 0) return((sint)0);/* for each sequence, find the most closely related sequence */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -