⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blast_stat.c

📁 ncbi源码
💻 C
📖 第 1 页 / 共 5 页
字号:
        for (index1 = 0; index1 < sbp->alphabet_size; index1++)            for (index2 = 0; index2 < sbp->alphabet_size; index2++)                matrix[index1][index2] = BLAST_SCORE_MIN;    } else {        /* Fill-in all the defaults ambiguity and normal codes */        status=BlastScoreBlkMatCreate(sbp);         if(status != 0)	{        	return status;	}    }        /* Read the residue names for the second alphabet */    while (fgets(buf, sizeof(buf), fp) != NULL) {        ++lineno;        if (strchr(buf, '\n') == NULL) {            return 2;        }        if (buf[0] == COMMENT_CHR) {            /* save the comment line in a linked list */            *strchr(buf, '\n') = NULLB;            ListNodeCopyStr(&sbp->comments, 0, buf+1);            continue;        }        if ((cp = strchr(buf, COMMENT_CHR)) != NULL)            *cp = NULLB;        lp = (char*)strtok(buf, TOKSTR);        if (lp == NULL) /* skip blank lines */            continue;        while (lp != NULL) {           if (sbp->alphabet_code == BLASTAA_SEQ_CODE)              ch = AMINOACID_TO_NCBISTDAA[toupper(*lp)];           else if (sbp->alphabet_code == BLASTNA_SEQ_CODE) {              ch = IUPACNA_TO_BLASTNA[toupper(*lp)];           } else {              ch = *lp;           }            a2chars[a2cnt++] = ch;            lp = (char*)strtok(NULL, TOKSTR);        }                break;	/* Exit loop after reading one line. */    }        if (a2cnt <= 1) {         return 2;    }    if (sbp->alphabet_code != BLASTNA_SEQ_CODE) {        sbp->mat_dim2 = a2cnt;    }    while (fgets(buf, sizeof(buf), fp) != NULL)  {        ++lineno;        if ((cp = strchr(buf, '\n')) == NULL) {            return 2;        }        if ((cp = strchr(buf, COMMENT_CHR)) != NULL)            *cp = NULLB;        if ((lp = (char*)strtok(buf, TOKSTR)) == NULL)            continue;        ch = *lp;        cp = (char*) lp;        if ((cp = strtok(NULL, TOKSTR)) == NULL) {            return 2;        }        if (a1cnt >= DIM(a1chars)) {            return 2;        }        if (sbp->alphabet_code == BLASTAA_SEQ_CODE) {           ch = AMINOACID_TO_NCBISTDAA[toupper(ch)];        } else {            if (sbp->alphabet_code == BLASTNA_SEQ_CODE) {                ch = IUPACNA_TO_BLASTNA[toupper(ch)];            }        }        a1chars[a1cnt++] = ch;        m = &matrix[(int)ch][0];        index2 = 0;        while (cp != NULL) {            if (index2 >= (int) a2cnt) {                return 2;            }            strcpy(temp, cp);                        if (strcasecmp(temp, "na") == 0)  {                score = BLAST_SCORE_1MIN;            } else  {                if (sscanf(temp, "%lg", &xscore) != 1) {                    return 2;                }				/*xscore = MAX(xscore, BLAST_SCORE_1MIN);*/                if (xscore > BLAST_SCORE_1MAX || xscore < BLAST_SCORE_1MIN) {                    return 2;                }                xscore += (xscore >= 0. ? 0.5 : -0.5);                score = (Int4)xscore;            }                        m[(int)a2chars[index2++]] = score;                        cp = strtok(NULL, TOKSTR);        }    }        if (a1cnt <= 1) {        return 2;    }        if (sbp->alphabet_code != BLASTNA_SEQ_CODE) {        sbp->mat_dim1 = a1cnt;    }        return 0;}static Int2BlastScoreBlkMaxScoreSet(BlastScoreBlk* sbp){	Int4 score, maxscore;	Int4 ** matrix; 	Int2 index1, index2;	sbp->loscore = BLAST_SCORE_1MAX;        sbp->hiscore = BLAST_SCORE_1MIN;	matrix = sbp->matrix;	for (index1=0; index1<sbp->alphabet_size; index1++)	{		maxscore=BLAST_SCORE_MIN;		for (index2=0; index2<sbp->alphabet_size; index2++)		{			score = matrix[index1][index2];			if (score <= BLAST_SCORE_MIN || score >= BLAST_SCORE_MAX)				continue;			if (score > maxscore)			{				maxscore = score;			}			if (sbp->loscore > score)				sbp->loscore = score;			if (sbp->hiscore < score)				sbp->hiscore = score;		}		sbp->maxscore[index1] = maxscore;	}/* If the lo/hi-scores are BLAST_SCORE_MIN/BLAST_SCORE_MAX, (i.e., forgaps), then use other scores. */	if (sbp->loscore < BLAST_SCORE_1MIN)		sbp->loscore = BLAST_SCORE_1MIN;	if (sbp->hiscore > BLAST_SCORE_1MAX)		sbp->hiscore = BLAST_SCORE_1MAX;	return 0;}Int2BlastScoreBlkMatrixLoad(BlastScoreBlk* sbp){    Int2 status = 0;    SNCBIPackedScoreMatrix* psm;    Int4** matrix = sbp->matrix;    int i, j;   /* loop indices */    ASSERT(sbp);    if (strcasecmp(sbp->name, "BLOSUM62") == 0) {        psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum62;    } else if (strcasecmp(sbp->name, "BLOSUM45") == 0) {        psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum45;    } else if (strcasecmp(sbp->name, "BLOSUM80") == 0) {        psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum80;    } else if (strcasecmp(sbp->name, "PAM30") == 0) {        psm = (SNCBIPackedScoreMatrix*) &NCBISM_Pam30;    } else if (strcasecmp(sbp->name, "PAM70") == 0) {        psm = (SNCBIPackedScoreMatrix*) &NCBISM_Pam70;    } else {        return -1;    }    /* Initialize with BLAST_SCORE_MIN */    for (i = 0; i < sbp->alphabet_size; i++) {        for (j = 0; j < sbp->alphabet_size; j++) {            matrix[i][j] = BLAST_SCORE_MIN;        }    }    for (i = 0; i < sbp->alphabet_size; i++) {        for (j = 0; j < sbp->alphabet_size; j++) {            /* skip selenocysteine and gap */            if (i == AMINOACID_TO_NCBISTDAA['U'] ||                 i == AMINOACID_TO_NCBISTDAA['-'] ||                j == AMINOACID_TO_NCBISTDAA['U'] ||                 j == AMINOACID_TO_NCBISTDAA['-']) {                continue;            }            matrix[i][j] = NCBISM_GetScore((const SNCBIPackedScoreMatrix*) psm,                                           i, j);        }    }    /* Sets dimensions of matrix. */    sbp->mat_dim1 = sbp->mat_dim2 = sbp->alphabet_size;    return status;}Int2BLAST_ScoreBlkMatFill(BlastScoreBlk* sbp, char* matrix_path){    Int2 status = 0;        if (sbp->read_in_matrix) {                if (matrix_path && *matrix_path != NULLB) {            FILE *fp = NULL;            char* full_matrix_path = NULL;            int path_len = strlen(matrix_path);            int buflen = path_len + strlen(sbp->name);            full_matrix_path = (char*) malloc((buflen + 1) * sizeof(char));            if (!full_matrix_path) {                return -1;            }            strncpy(full_matrix_path, matrix_path, buflen);            strncat(full_matrix_path, sbp->name, buflen - path_len);            if ( (fp=fopen(full_matrix_path, "r")) == NULL) {               return -1;            }            sfree(full_matrix_path);            if ( (status=BlastScoreBlkMatRead(sbp, fp)) != 0) {               fclose(fp);               return status;            }            fclose(fp);        } else {            if ( (status = BlastScoreBlkMatrixLoad(sbp)) !=0) {                return status;            }        }    } else {       /* Nucleotide BLAST only! */       if ( (status=BlastScoreBlkMatCreate(sbp)) != 0)          return status;    }        if ( (status=BlastScoreBlkMaxScoreSet(sbp)) != 0)       return status;    return status;}Blast_ResFreq*Blast_ResFreqDestruct(Blast_ResFreq* rfp){	if (rfp == NULL)		return NULL;	if (rfp->prob0 != NULL)		sfree(rfp->prob0);	sfree(rfp);	return rfp;}/*	Allocates the Blast_ResFreq* and then fills in the frequencies	in the probabilities.*/ Blast_ResFreq*Blast_ResFreqNew(const BlastScoreBlk* sbp){	Blast_ResFreq*	rfp;	if (sbp == NULL)	{		return NULL;	}	rfp = (Blast_ResFreq*) calloc(1, sizeof(Blast_ResFreq));	if (rfp == NULL)		return NULL;	rfp->alphabet_code = sbp->alphabet_code;	rfp->prob0 = (double*) calloc(sbp->alphabet_size, sizeof(double));	if (rfp->prob0 == NULL) 	{		rfp = Blast_ResFreqDestruct(rfp);		return rfp;	}	rfp->prob = rfp->prob0 - sbp->alphabet_start;	return rfp;}typedef struct BLAST_LetterProb {		char	ch;		double	p;	} BLAST_LetterProb;#if 0/* Unused for right now, but do not remove *//*  M. O. Dayhoff amino acid background frequencies   */static BLAST_LetterProb	Dayhoff_prob[] = {		{ 'A', 87.13 },		{ 'C', 33.47 },		{ 'D', 46.87 },		{ 'E', 49.53 },		{ 'F', 39.77 },		{ 'G', 88.61 },		{ 'H', 33.62 },		{ 'I', 36.89 },		{ 'K', 80.48 },		{ 'L', 85.36 },		{ 'M', 14.75 },		{ 'N', 40.43 },		{ 'P', 50.68 },		{ 'Q', 38.26 },		{ 'R', 40.90 },		{ 'S', 69.58 },		{ 'T', 58.54 },		{ 'V', 64.72 },		{ 'W', 10.49 },		{ 'Y', 29.92 }	};/* Stephen Altschul amino acid background frequencies */static BLAST_LetterProb Altschul_prob[] = {		{ 'A', 81.00 },		{ 'C', 15.00 },		{ 'D', 54.00 },		{ 'E', 61.00 },		{ 'F', 40.00 },		{ 'G', 68.00 },		{ 'H', 22.00 },		{ 'I', 57.00 },		{ 'K', 56.00 },		{ 'L', 93.00 },		{ 'M', 25.00 },		{ 'N', 45.00 },		{ 'P', 49.00 },		{ 'Q', 39.00 },		{ 'R', 57.00 },		{ 'S', 68.00 },		{ 'T', 58.00 },		{ 'V', 67.00 },		{ 'W', 13.00 },		{ 'Y', 32.00 }	};#endif/* amino acid background frequencies from Robinson and Robinson */static BLAST_LetterProb Robinson_prob[] = {		{ 'A', 78.05 },		{ 'C', 19.25 },		{ 'D', 53.64 },		{ 'E', 62.95 },		{ 'F', 38.56 },		{ 'G', 73.77 },		{ 'H', 21.99 },		{ 'I', 51.42 },		{ 'K', 57.44 },		{ 'L', 90.19 },		{ 'M', 22.43 },		{ 'N', 44.87 },		{ 'P', 52.03 },		{ 'Q', 42.64 },		{ 'R', 51.29 },		{ 'S', 71.20 },		{ 'T', 58.41 },		{ 'V', 64.41 },		{ 'W', 13.30 },		{ 'Y', 32.16 }	};#define STD_AMINO_ACID_FREQS Robinson_probstatic BLAST_LetterProb	nt_prob[] = {		{ 'A', 25.00 },		{ 'C', 25.00 },		{ 'G', 25.00 },		{ 'T', 25.00 }	};/*	Normalize the frequencies to "norm".*/static Int2Blast_ResFreqNormalize(const BlastScoreBlk* sbp, Blast_ResFreq* rfp, double norm){	Int2	alphabet_stop, index;	double	sum = 0., p;	if (norm == 0.)		return 1;	alphabet_stop = sbp->alphabet_start + sbp->alphabet_size;	for (index=sbp->alphabet_start; index<alphabet_stop; index++)	{		p = rfp->prob[index];		if (p < 0.)			return 1;		sum += p;	}	if (sum <= 0.)		return 0;	for (index=sbp->alphabet_start; index<alphabet_stop; index++)	{		rfp->prob[index] /= sum;		rfp->prob[index] *= norm;	}	return 0;}Int2Blast_GetStdAlphabet(Uint1 alphabet_code, Uint1* residues, Uint4 residues_size){	Int2 index;	if (residues_size < DIM(STD_AMINO_ACID_FREQS))		return -2;	for (index=0; index<(int)DIM(STD_AMINO_ACID_FREQS); index++) 	{		if (alphabet_code == BLASTAA_SEQ_CODE)		{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -