⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xscore.c

📁 是有关基因比对的经典算法的实现。这对于初学计算生物学的人是非常重要的算法。
💻 C
📖 第 1 页 / 共 2 页
字号:
 
 
void set_score_user_dnamatrix(ButtoN but)
{

	if(get_user_matrixname(score_dnamtrxname,score_dnamatrix,score_dna_xref,3,&score_dnamatnum,scorednamattext))
	{
		calc_colscores(residue_exceptions,TRUE);
		SetValue(score_dnamatrix_list,score_dnamatnum);
	}
}
 
void set_score_dnamatrix(GrouP g)
{
	int tmp;

        tmp = GetValue(g);
	if(tmp>0 && tmp<3)
	{
		score_dnamatnum=tmp;
	}
	else
	{
                if (score_dnamtrxname[0]=='\0')
		{
			get_user_matrixname(score_dnamtrxname,score_dnamatrix,score_dna_xref,3,&score_dnamatnum,scorednamattext);
		}
		else score_dnamatnum=3;
	}
	calc_colscores(residue_exceptions,TRUE);

	SetValue(score_dnamatrix_list,score_dnamatnum);
}
 
void set_segment_user_dnamatrix(ButtoN but)
{

	if(get_user_matrixname(segment_dnamtrxname,segment_dnamatrix,segment_dna_xref,3,&segment_dnamatnum,segmentdnamattext))
		calc_seg_exceptions();
	if(segment_exceptions) show_segment_exceptions();

	SetValue(seg_dnamatrix_list,segment_dnamatnum);
}

void set_segment_dnamatrix(GrouP g)
{
        int tmp;
 
        tmp = GetValue(g);
        if(tmp>0 && tmp<3)
        {
                segment_dnamatnum=tmp;
        }
        else
        {
                if (segment_dnamtrxname[0]=='\0')
                {
			get_user_matrixname(segment_dnamtrxname,segment_dnamatrix,segment_dna_xref,3,&segment_dnamatnum,segmentdnamattext);
                }
                else segment_dnamatnum=3;
        }
 
	calc_seg_exceptions();
        if(segment_exceptions) show_segment_exceptions();
 
        SetValue(seg_dnamatrix_list,segment_dnamatnum);
}

static void calc_colscores(Boolean update_seqs,Boolean update_scores)
{
	panel_data data;

	if(aln_mode==MULTIPLEM)
	{
		GetPanelExtra(seq_panel.seqs,&data);
		make_colscores(data);
		SetPanelExtra(seq_panel.seqs,&data);
		if(update_seqs) draw_seqs(seq_panel.seqs);
		if(update_scores) draw_colscores(seq_panel.seqs);
	}
	else
	{
		GetPanelExtra(prf_panel[0].seqs,&data);
		make_colscores(data);
		SetPanelExtra(prf_panel[0].seqs,&data);
		if(update_seqs) draw_seqs(prf_panel[0].seqs);
		if(update_scores) draw_colscores(prf_panel[0].seqs);
		GetPanelExtra(prf_panel[1].seqs,&data);
		make_colscores(data);
		SetPanelExtra(prf_panel[1].seqs,&data);
		if(update_seqs) draw_seqs(prf_panel[1].seqs);
		if(update_scores) draw_colscores(prf_panel[1].seqs);
	}
}
 
void calc_seg_exceptions(void)
{
	if(aln_mode==MULTIPLEM)
	{
		calc_panel_segment_exceptions(seq_panel.seqs);
	}
	else
	{
		calc_panel_segment_exceptions(prf_panel[0].seqs);
		calc_panel_segment_exceptions(prf_panel[1].seqs);
	}
}

void show_segment_exceptions(void)
{
	if(aln_mode==MULTIPLEM)
	{
		draw_seqs(seq_panel.seqs);
	}
	else
	{
		draw_seqs(prf_panel[0].seqs);
		draw_seqs(prf_panel[1].seqs);
	}
}

static void remove_short_segments(PaneL p)
{
	int i,j,k,start;
	panel_data data;

	GetPanelExtra(p,&data);
	if(data.nseqs<=0) return;

/* Reset all the exceptions - a value of 1 indicates an exception that
will be displayed. A value of -1 is used to remember exceptions that
are temporarily hidden in the display */
        for(i=0;i<data.nseqs;i++)
                for(j=0;j<data.ncols;j++)
			if(data.segment_exception[i][j] == -1)
				data.segment_exception[i][j] = 1;

        for(i=0;i<data.nseqs;i++)
        {
		start = -1;
                for(j=0;j<=data.ncols;j++)
                {
			if(start == -1)
			{
				if(data.segment_exception[i][j]==1)
					start=j;
			}
			else
			{
				if(j==data.ncols || data.segment_exception[i][j]==0)
				{
					if(j-start<length_cutoff)
						for(k=start;k<j;k++)
							data.segment_exception[i][k] = -1;
					start = -1;
				}
			}

		}
	}

	SetPanelExtra(p,&data);
}

static void calc_weights(PaneL p)
{
	int i,j;
	int status;
	sint *weight;
	float dscore;
	FILE *tree;
	panel_data data;

#ifdef UNIX
	char tree_name[FILENAMELEN]=".score.ph";
#else
	char tree_name[FILENAMELEN]="tmp.ph";
#endif

	GetPanelExtra(p,&data);
	if(data.nseqs<=0) return;

/* if sequence weights have been calculated before - don't bother
doing it again (it takes too long). data.seqweight is set to NULL when
 new sequences are loaded. */
	if(data.seqweight!=NULL) return;

	WatchCursor();
	info("Calculating sequence weights...");
/* count pairwise percent identities to make a phylogenetic tree */
	if(data.nseqs>=2)
	{
        	for (i=1;i<=data.nseqs;i++) {
                	for (j=i+1;j<=data.nseqs;j++) {
                        	dscore = countid(i+data.firstseq,j+data.firstseq);
                        	tmat[i][j] = (100.0 - dscore)/100.0;
                        	tmat[j][i] = tmat[i][j];
                	}
        	}

                if((tree = open_explicit_file(tree_name))==NULL) return;

		guide_tree(tree,data.firstseq+1,data.nseqs);

       		status = read_tree(tree_name, data.firstseq, data.firstseq+data.nseqs);
        	if (status == 0) return;
 
	}
 
	weight = (sint *) ckalloc( (data.firstseq+data.nseqs+1) * sizeof(sint) );
/* get the sequence weights */
 	calc_seq_weights(data.firstseq, data.firstseq+data.nseqs,weight);
	if(data.seqweight==NULL) data.seqweight=(sint *)ckalloc((data.nseqs+1) * sizeof(sint));
	for(i=data.firstseq;i<data.firstseq+data.nseqs;i++)
		data.seqweight[i-data.firstseq]=weight[i];

/* clear the memory for the phylogenetic tree */
   	if (data.nseqs >= 2)
	{
        	clear_tree(NULL);
		remove(tree_name);
	}
	ckfree(weight);
	SetPanelExtra(p,&data);
	info("Done.");
	ArrowCursor();
}

static void calc_panel_segment_exceptions(PaneL p)
{
        int i,j;
        float sum,prev_sum;
	float gscale;
	sint **profile;
	sint *weight,sweight;
	sint *gaps;
	sint maxres;
	int max=0,offset;
	short  *mat_xref, *matptr;
	sint matrix[NUMRES][NUMRES];
	float *fsum;
	float *bsum;
	float *pscore;
	panel_data data;
 
/* First, calculate sequence weights which will be used to build the
profile */
	calc_weights(p);

	GetPanelExtra(p,&data);
	if(data.nseqs<=0) return;
 
	WatchCursor();
	info("Calculating profile scores...");

        for(i=0;i<data.nseqs;i++)
                for(j=0;j<data.ncols;j++)
                     	data.segment_exception[i][j]=0;

/* get the comparison matrix for building the profile */
	if(dnaflag)
	{
		if (segment_dnamatnum==1)
		{
			matptr = swgapdnamt;
			mat_xref = def_dna_xref;
		}
		else if (segment_dnamatnum==2)
		{
			matptr = clustalvdnamt;
			mat_xref = def_dna_xref;
		}
		else
		{
			matptr = segment_dnamatrix;
			mat_xref = segment_dna_xref;
		}
/* get a positive matrix - then adjust it according to scale */
		maxres = get_matrix(matptr, mat_xref, matrix, FALSE, 100);
/* find the maximum value */
	for(i=0;i<=max_aa;i++)
		for(j=0;j<=max_aa;j++)
			if(matrix[i][j]>max) max=matrix[i][j];
/* subtract max*scale/2 from each matrix value */
	offset=(float)(max*segment_dnascale)/20.0;

	for(i=0;i<=max_aa;i++)
		for(j=0;j<=max_aa;j++)
			matrix[i][j]-=offset;
	}
	else
	{
		if (segment_matnum==1)
		{
			matptr = gon80mt;
			mat_xref = def_aa_xref;
		}
		else if (segment_matnum==2)
		{
			matptr = gon120mt;
			mat_xref = def_aa_xref;
		}
		else if (segment_matnum==3)
		{
			matptr = gon250mt;
			mat_xref = def_aa_xref;
		}
		else if (segment_matnum==4)
		{
			matptr = gon350mt;
			mat_xref = def_aa_xref;
		}
		else
		{
			matptr = segment_matrix;
			mat_xref = segment_aa_xref;
		}
/* get a negative matrix */
		maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100);
	}

	profile = (sint **) ckalloc( (data.ncols+2) * sizeof (sint *) );
	for(i=0; i<data.ncols+1; i++)
		profile[i] = (sint *) ckalloc( (LENCOL+2) * sizeof(sint) );

/* calculate the profile */
	gaps = (sint *) ckalloc( (data.ncols+1) * sizeof (sint) );
	for (j=1; j<=data.ncols; j++)
	{
		gaps[j-1] = 0;
		for(i=data.firstseq+1;i<data.firstseq+data.nseqs;i++)
			if (j<seqlen_array[i])
				if ((seq_array[i][j] < 0) || (seq_array[i][j] > max_aa))
					gaps[j-1]++;
	}
	weight = (sint *) ckalloc( (data.firstseq+data.nseqs+1) * sizeof(sint) );
	for(i=data.firstseq;i<data.firstseq+data.nseqs;i++)
		weight[i]=data.seqweight[i-data.firstseq];

	build_profile(data.ncols,data.firstseq,data.firstseq+data.nseqs,matrix,weight,profile);

	sweight=0;
        for(i=data.firstseq;i<data.firstseq+data.nseqs;i++)
		sweight+=weight[i];

/*Now, use the profile scores to mark segments of each sequence which score
badly. */

	fsum = (float *) ckalloc( (data.ncols+2) * sizeof (float) );
	bsum = (float *) ckalloc( (data.ncols+2) * sizeof (float) );
	pscore = (float *) ckalloc( (data.ncols+2) * sizeof (float) );
        for(i=data.firstseq+1;i<data.firstseq+data.nseqs+1;i++)
        {
/* In a forward phase, sum the profile scores. Mark negative sums as exceptions.
If the sum is positive, then it gets reset to 0. */
		sum=0.0;
                for(j=1;j<=seqlen_array[i];j++)
                {
           		gscale = (float)(data.nseqs-gaps[j-1]) / (float)data.nseqs;
			if(seq_array[i][j]<0 || seq_array[i][j]>=max_aa)
			{
				pscore[j-1]=0.0;
				sum=0.0;
			}
			else
                        	pscore[j-1]=(profile[j][seq_array[i][j]]-
			weight[i-1]*matrix[seq_array[i][j]][seq_array[i][j]])*gscale/sweight;
                        sum+=pscore[j-1];
			if(sum>0.0) sum=0.0;
			fsum[j-1]=sum;
                }
/* trim off any positive scoring residues from the end of the segments */
		prev_sum=0;
                for(j=seqlen_array[i]-1;j>=0;j--)
                {
			if(prev_sum>=0.0 && fsum[j]<0.0 && pscore[j]>=0.0)
				fsum[j]=0.0;
			prev_sum=fsum[j];
                }

/* Now, in a backward phase, do the same summing process. */
		sum=0.0;
                for(j=seqlen_array[i];j>=1;j--)
                {
			if(seq_array[i][j]<0 || seq_array[i][j]>=max_aa)
				sum=0;
			else
                        	sum+=pscore[j-1];
			if(sum>0.0) sum=0.0;
			bsum[j-1]=sum;
                }
/* trim off any positive scoring residues from the start of the segments */
		prev_sum=0;
                for(j=0;j<seqlen_array[i];j++)
                {
			if(prev_sum>=0.0 && bsum[j]<0.0 && pscore[j]>=0.0)
				bsum[j]=0.0;
			prev_sum=bsum[j];
                }
/*Mark residues as exceptions if they score negative in the forward AND backward directions. */
                for(j=1;j<=seqlen_array[i];j++)
			if(fsum[j-1]<0.0 && bsum[j-1]<0.0)
				if(seq_array[i][j]>=0 && seq_array[i][j]<max_aa)
				data.segment_exception[i-data.firstseq-1][j-1]=-1;
/*
if(i==5) {
fprintf(stderr,"%4d ",j);
fprintf(stderr,"\n");
for(j=0;j<seqlen_array[i];j++)
fprintf(stderr,"%4d ",(int)fsum[j]);
fprintf(stderr,"\n");
}
*/
        }
	for(i=0; i<data.ncols+1; i++)
		ckfree(profile[i]);
	ckfree(profile);
	ckfree(weight);
	ckfree(gaps);
	ckfree(pscore);
	ckfree(fsum);
	ckfree(bsum);

	SetPanelExtra(p,&data);

/* Finally, apply the length cutoff to the segments - removing segments shorter
than the cutoff */
	remove_short_segments(p);

	info("Done.");
	ArrowCursor();
}


void set_segment_dnascale(BaR bar, GraphiC p, Nlm_Int2 newval, Nlm_Int2 oldval)
{
        char str[FILENAMELEN];
	panel_data data;
 
        segment_dnascale = newval+1;
	calc_seg_exceptions();
	if(segment_exceptions) show_segment_exceptions();
	sprintf(str,"DNA Marking Scale:   %2d",segment_dnascale);
	SetTitle(segmentdnascaletext,str);
}

 
static void build_profile(int prf_length,int first_seq,int last_seq,sint matrix[NUMRES][NUMRES],sint *weight,sint **profile)
{
  sint **weighting, d, i, res; 
  sint r, pos;
  int f;

  weighting = (sint **) ckalloc( (NUMRES+2) * sizeof (sint *) );
  for (i=0;i<NUMRES+2;i++)
    weighting[i] = (sint *) ckalloc( (prf_length+2) * sizeof (sint) );

  for (r=0; r<prf_length; r++)
   {
      for (d=0; d<=max_aa; d++)
        {
            weighting[d][r] = 0;
            for (i=first_seq; i<last_seq; i++)
		if (r+1<seqlen_array[i+1])
               		if (d == seq_array[i+1][r+1]) weighting[d][r] += weight[i];
        }
      weighting[gap_pos1][r] = 0;
      for (i=first_seq; i<last_seq; i++)
	if (r+1<seqlen_array[i+1])
         if (gap_pos1 == seq_array[i+1][r+1]) weighting[gap_pos1][r] += weight[i];
      weighting[gap_pos2][r] = 0;
      for (i=first_seq; i<last_seq; i++)
	if (r+1<seqlen_array[i+1])
         if (gap_pos2 == seq_array[i+1][r+1]) weighting[gap_pos2][r] += weight[i];
   }

  for (pos=0; pos< prf_length; pos++)
    {
           for (res=0; res<=max_aa; res++)
             {
                f = 0;
                for (d=0; d<=max_aa; d++)
                     f += (weighting[d][pos] * matrix[d][res]);
                f += (weighting[gap_pos1][pos] * matrix[gap_pos1][res]);
                f += (weighting[gap_pos2][pos] * matrix[gap_pos2][res]);
                profile[pos+1][res] = f;
             }
           f = 0;
           for (d=0; d<=max_aa; d++)
                f += (weighting[d][pos] * matrix[d][gap_pos1]);
           f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos1]);
           f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos1]);
           profile[pos+1][gap_pos1] = f;
           f = 0;
           for (d=0; d<=max_aa; d++)
                f += (weighting[d][pos] * matrix[d][gap_pos2]);
           f += (weighting[gap_pos1][pos] * matrix[gap_pos1][gap_pos2]);
           f += (weighting[gap_pos2][pos] * matrix[gap_pos2][gap_pos2]);
           profile[pos+1][gap_pos2] = f;
    }

  for (i=0;i<=max_aa;i++)
    weighting[i]=ckfree((void *)weighting[i]);
  weighting=ckfree((void *)weighting);

}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -