blast_stat.c

来自「ncbi源码」· C语言 代码 · 共 2,179 行 · 第 1/5 页

C
2,179
字号
    {100, 10, (double) INT2_MAX, 0.0286, 0.041, 0.15, 0.19, -32},    {95, 10, (double) INT2_MAX, 0.0272, 0.030, 0.13, 0.21, -35},    {90, 10, (double) INT2_MAX, 0.0257, 0.022, 0.11, 0.24, -40},    {85, 10, (double) INT2_MAX, 0.0242, 0.017, 0.083, 0.29, -51},    {115, 9, (double) INT2_MAX, 0.0306, 0.061, 0.24, 0.13, -14},    {110, 9, (double) INT2_MAX, 0.0299, 0.053, 0.19, 0.16, -23},    {105, 9, (double) INT2_MAX, 0.0289, 0.043, 0.17, 0.17, -23},    {100, 9, (double) INT2_MAX, 0.0279, 0.036, 0.14, 0.20, -31},    {95, 9, (double) INT2_MAX, 0.0266, 0.028, 0.12, 0.23, -37},    {120, 8, (double) INT2_MAX, 0.0307, 0.062, 0.22, 0.14, -18},    {115, 8, (double) INT2_MAX, 0.0300, 0.053, 0.20, 0.15, -19},    {110, 8, (double) INT2_MAX, 0.0292, 0.046, 0.17, 0.17, -23},    {105, 8, (double) INT2_MAX, 0.0280, 0.035, 0.14, 0.20, -31},    {100, 8, (double) INT2_MAX, 0.0266, 0.026, 0.12, 0.23, -37},    {125, 7, (double) INT2_MAX, 0.0306, 0.058, 0.22, 0.14, -18},    {120, 7, (double) INT2_MAX, 0.0300, 0.052, 0.19, 0.16, -23},    {115, 7, (double) INT2_MAX, 0.0292, 0.044, 0.17, 0.17, -24},    {110, 7, (double) INT2_MAX, 0.0279, 0.032, 0.14, 0.20, -31},    {105, 7, (double) INT2_MAX, 0.0267, 0.026, 0.11, 0.24, -41},    {120,10,5, 0.0298, 0.049, 0.19, 0.16, -21},    {115,10,5, 0.0290, 0.042, 0.16, 0.18, -25},    {110,10,5, 0.0279, 0.033, 0.13, 0.21, -32},    {105,10,5, 0.0264, 0.024, 0.10, 0.26, -46},    {100,10,5, 0.0250, 0.018, 0.081, 0.31, -56},    {125,10,4, 0.0301, 0.053, 0.18, 0.17, -25},    {120,10,4, 0.0292, 0.043, 0.15, 0.20, -33},    {115,10,4, 0.0282, 0.035, 0.13, 0.22, -36},    {110,10,4, 0.0270, 0.027, 0.11, 0.25, -41},    {105,10,4, 0.0254, 0.020, 0.079, 0.32, -60},    {130,10,3, 0.0300, 0.051, 0.17, 0.18, -27},    {125,10,3, 0.0290, 0.040, 0.13, 0.22, -38},    {120,10,3, 0.0278, 0.030, 0.11, 0.25, -44},    {115,10,3, 0.0267, 0.025, 0.092, 0.29, -52},    {110,10,3, 0.0252, 0.018, 0.070, 0.36, -70},    {135,10,2, 0.0292, 0.040, 0.13, 0.22, -35},    {130,10,2, 0.0283, 0.034, 0.10, 0.28, -51},    {125,10,2, 0.0269, 0.024, 0.077, 0.35, -71},    {120,10,2, 0.0253, 0.017, 0.059, 0.43, -90},    {115,10,2, 0.0234, 0.011, 0.043, 0.55, -121},    {100,14,3, 0.0258, 0.023, 0.087, 0.33, -59},    {105,13,3, 0.0263, 0.024, 0.085, 0.31, -57},    {110,12,3, 0.0271, 0.028, 0.093, 0.29, -54},    {115,11,3, 0.0275, 0.030, 0.10, 0.27, -49},    {125,9,3, 0.0283, 0.034, 0.12, 0.23, -38},    {130,8,3, 0.0287, 0.037, 0.12, 0.23, -40},    {125,7,3, 0.0287, 0.036, 0.12, 0.24, -44},    {140,6,3, 0.0285, 0.033, 0.12, 0.23, -40},    {105,14,3, 0.0270, 0.028, 0.10, 0.27, -46},    {110,13,3, 0.0279, 0.034, 0.10, 0.27, -50},    {115,12,3, 0.0282, 0.035, 0.12, 0.24, -42},    {120,11,3, 0.0286, 0.037, 0.12, 0.24, -44},};static Int4 blosum62_20_prefs[BLOSUM62_20_VALUES_MAX] = {BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_BEST,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL,BLAST_MATRIX_NOMINAL};/*	Allocates memory for the BlastScoreBlk*.*/BlastScoreBlk*BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts){	BlastScoreBlk* sbp;	sbp = (BlastScoreBlk*) calloc(1, sizeof(BlastScoreBlk));	if (sbp != NULL)	{		sbp->alphabet_code = alphabet;		if (alphabet != BLASTNA_SEQ_CODE)			sbp->alphabet_size = BLASTAA_SIZE;		else			sbp->alphabet_size = BLASTNA_SIZE;/* set the alphabet type (protein or not). */		switch (alphabet) {			case BLASTAA_SEQ_CODE:				sbp->protein_alphabet = TRUE;				break;			case BLASTNA_SEQ_CODE:				sbp->protein_alphabet = FALSE;				break;			default:				break;		}		sbp->matrix_struct = BlastMatrixAllocate(sbp->alphabet_size);		if (sbp->matrix_struct == NULL)		{			sbp = BlastScoreBlkFree(sbp);			return sbp;		}		sbp->matrix = sbp->matrix_struct->matrix;		sbp->maxscore = (Int4 *) calloc(BLAST_MATRIX_SIZE, sizeof(Int4));        sbp->scale_factor = 1.0;		sbp->number_of_contexts = number_of_contexts;		sbp->sfp = (Blast_ScoreFreq**)          calloc(sbp->number_of_contexts, sizeof(Blast_ScoreFreq*));		sbp->kbp_std = (Blast_KarlinBlk**)         calloc(sbp->number_of_contexts, sizeof(Blast_KarlinBlk*));		sbp->kbp_gap_std = (Blast_KarlinBlk**)         calloc(sbp->number_of_contexts, sizeof(Blast_KarlinBlk*));		sbp->kbp_psi = (Blast_KarlinBlk**)         calloc(sbp->number_of_contexts, sizeof(Blast_KarlinBlk*));		sbp->kbp_gap_psi = (Blast_KarlinBlk**)         calloc(sbp->number_of_contexts, sizeof(Blast_KarlinBlk*));	}	return sbp;}Blast_ScoreFreq*Blast_ScoreFreqDestruct(Blast_ScoreFreq* sfp){	if (sfp == NULL)		return NULL;	if (sfp->sprob0 != NULL)		sfree(sfp->sprob0);	sfree(sfp);	return sfp;}/*	Deallocates the Karlin Block.*/Blast_KarlinBlk*Blast_KarlinBlkDestruct(Blast_KarlinBlk* kbp){	sfree(kbp);	return kbp;}static SBLASTMatrixStructure*BlastMatrixDestruct(SBLASTMatrixStructure* matrix_struct){	if (matrix_struct == NULL)		return NULL;	sfree(matrix_struct);	return matrix_struct;}BlastScoreBlk*BlastScoreBlkFree(BlastScoreBlk* sbp){    Int4 index, rows;    if (sbp == NULL)        return NULL;        for (index=0; index<sbp->number_of_contexts; index++) {        if (sbp->sfp)            sbp->sfp[index] = Blast_ScoreFreqDestruct(sbp->sfp[index]);        if (sbp->kbp_std)            sbp->kbp_std[index] = Blast_KarlinBlkDestruct(sbp->kbp_std[index]);        if (sbp->kbp_gap_std)            sbp->kbp_gap_std[index] = Blast_KarlinBlkDestruct(sbp->kbp_gap_std[index]);        if (sbp->kbp_psi)            sbp->kbp_psi[index] = Blast_KarlinBlkDestruct(sbp->kbp_psi[index]);        if (sbp->kbp_gap_psi)            sbp->kbp_gap_psi[index] = Blast_KarlinBlkDestruct(sbp->kbp_gap_psi[index]);    }    if (sbp->kbp_ideal)        sbp->kbp_ideal = Blast_KarlinBlkDestruct(sbp->kbp_ideal);    sfree(sbp->sfp);    sfree(sbp->kbp_std);    sfree(sbp->kbp_psi);    sfree(sbp->kbp_gap_std);    sfree(sbp->kbp_gap_psi);    sbp->matrix_struct = BlastMatrixDestruct(sbp->matrix_struct);    sfree(sbp->maxscore);    sbp->comments = ListNodeFreeData(sbp->comments);    sfree(sbp->name);    sfree(sbp->ambiguous_res);        /* Removing posMatrix and posFreqs if any */    rows = sbp->query_length + 1;    if(sbp->posMatrix != NULL) {        for (index=0; index < rows; index++) {            sfree(sbp->posMatrix[index]);        }        sfree(sbp->posMatrix);    }        if(sbp->posFreqs != NULL) {        for (index = 0; index < rows; index++) {            sfree(sbp->posFreqs[index]);        }        sfree(sbp->posFreqs);    }        sfree(sbp);        return NULL;}/* 	Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.	Convert from ncbieaa to sbp->alphabet_code (i.e., ncbistdaa) first.*/Int2BLAST_ScoreSetAmbigRes(BlastScoreBlk* sbp, char ambiguous_res){	Int2 index;	Uint1* ambig_buffer;	if (sbp == NULL)		return 1;	if (sbp->ambig_occupy >= sbp->ambig_size)	{		sbp->ambig_size += 5;		ambig_buffer = (Uint1 *) calloc(sbp->ambig_size, sizeof(Uint1));		for (index=0; index<sbp->ambig_occupy; index++)		{			ambig_buffer[index] = sbp->ambiguous_res[index];		}		sfree(sbp->ambiguous_res);		sbp->ambiguous_res = ambig_buffer;	}	if (sbp->alphabet_code == BLASTAA_SEQ_CODE)	{		sbp->ambiguous_res[sbp->ambig_occupy] =          AMINOACID_TO_NCBISTDAA[toupper(ambiguous_res)];	}	else {      if (sbp->alphabet_code == BLASTNA_SEQ_CODE)         sbp->ambiguous_res[sbp->ambig_occupy] =             IUPACNA_TO_BLASTNA[toupper(ambiguous_res)];      else if (sbp->alphabet_code == NCBI4NA_SEQ_CODE)         sbp->ambiguous_res[sbp->ambig_occupy] =             IUPACNA_TO_NCBI4NA[toupper(ambiguous_res)];   }	(sbp->ambig_occupy)++;		return 0;}/* 	Fill in the matrix for blastn using the penaly and rewards	The query sequence alphabet is blastna, the subject sequence	is ncbi2na.  The alphabet blastna is defined in blastkar.h	and the first four elements of blastna are identical to ncbi2na.	The query is in the first index, the subject is the second.        if matrix==NULL, it is allocated and returned.*/static Int4 **BlastScoreBlkMatCreateEx(Int4 **matrix,Int4 penalty,                                        Int4 reward){	Int2	index1, index2, degen;	Int2 degeneracy[BLASTNA_SIZE+1];	        if(!matrix) {            SBLASTMatrixStructure* matrix_struct;            matrix_struct =BlastMatrixAllocate((Int2) BLASTNA_SIZE);            matrix = matrix_struct->matrix;        }	for (index1 = 0; index1<BLASTNA_SIZE; index1++) /* blastna */		for (index2 = 0; index2<BLASTNA_SIZE; index2++) /* blastna */			matrix[index1][index2] = 0;	/* In blastna the 1st four bases are A, C, G, and T, exactly as it is ncbi2na. */	/* ncbi4na gives them the value 1, 2, 4, and 8.  */	/* Set the first four bases to degen. one */	for (index1=0; index1<NUMBER_NON_AMBIG_BP; index1++)		degeneracy[index1] = 1;	for (index1=NUMBER_NON_AMBIG_BP; index1<BLASTNA_SIZE; index1++) /* blastna */	{		degen=0;		for (index2=0; index2<NUMBER_NON_AMBIG_BP; index2++) /* ncbi2na */		{			if (BLASTNA_TO_NCBI4NA[index1] & BLASTNA_TO_NCBI4NA[index2])				degen++;		}		degeneracy[index1] = degen;	}	for (index1=0; index1<BLASTNA_SIZE; index1++) /* blastna */	{		for (index2=index1; index2<BLASTNA_SIZE; index2++) /* blastna */		{			if (BLASTNA_TO_NCBI4NA[index1] & BLASTNA_TO_NCBI4NA[index2])			{ /* round up for positive scores, down for negatives. */				matrix[index1][index2] = BLAST_Nint( (double) ((degeneracy[index2]-1)*penalty + reward))/degeneracy[index2];				if (index1 != index2)				{				      matrix[index2][index1] = matrix[index1][index2];				}			}			else			{				matrix[index1][index2] = penalty;				matrix[index2][index1] = penalty;			}		}	}        /* The value of 15 is a gap, which is a sentinel between strands in            the ungapped extension algorithm */	for (index1=0; index1<BLASTNA_SIZE; index1++) /* blastna */           matrix[BLASTNA_SIZE-1][index1] = INT4_MIN / 2;	for (index1=0; index1<BLASTNA_SIZE; index1++) /* blastna */           matrix[index1][BLASTNA_SIZE-1] = INT4_MIN / 2;	return matrix;}/* 	Fill in the matrix for blastn using the penaly and rewards on	the BlastScoreBlk*.	The query sequence alphabet is blastna, the subject sequence	is ncbi2na.  The alphabet blastna is defined in blastkar.h	and the first four elements of blastna are identical to ncbi2na.	The query is in the first index, the subject is the second.*/static Int2 BlastScoreBlkMatCreate(BlastScoreBlk* sbp){   sbp->matrix = BlastScoreBlkMatCreateEx(sbp->matrix,sbp->penalty, sbp->reward);	sbp->mat_dim1 = BLASTNA_SIZE;	sbp->mat_dim2 = BLASTNA_SIZE;	return 0;}/* 	Read in the matrix from the FILE *fp.	This function ASSUMES that the matrices are in the ncbistdaa	format.  BLAST should be able to use any alphabet, though it	is expected that ncbistdaa will be used.*/static Int2BlastScoreBlkMatRead(BlastScoreBlk* sbp, FILE *fp){    char	buf[512+3];    char	temp[512];    char*	cp,* lp;    char		ch;    Int4 **	matrix;    Int4 *	m;    Int4	score;    Uint4	a1cnt = 0, a2cnt = 0;    char    a1chars[BLAST_MAX_ALPHABET], a2chars[BLAST_MAX_ALPHABET];    long	lineno = 0;    double	xscore;    register int	index1, index2;    Int2 status;        matrix = sbp->matrix;	        if (sbp->alphabet_code != BLASTNA_SEQ_CODE) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?