📄 blast_stat.c
字号:
for (index1 = 0; index1 < sbp->alphabet_size; index1++) for (index2 = 0; index2 < sbp->alphabet_size; index2++) matrix[index1][index2] = BLAST_SCORE_MIN; } else { /* Fill-in all the defaults ambiguity and normal codes */ status=BlastScoreBlkMatCreate(sbp); if(status != 0) { return status; } } /* Read the residue names for the second alphabet */ while (fgets(buf, sizeof(buf), fp) != NULL) { ++lineno; if (strchr(buf, '\n') == NULL) { return 2; } if (buf[0] == COMMENT_CHR) { /* save the comment line in a linked list */ *strchr(buf, '\n') = NULLB; ListNodeCopyStr(&sbp->comments, 0, buf+1); continue; } if ((cp = strchr(buf, COMMENT_CHR)) != NULL) *cp = NULLB; lp = (char*)strtok(buf, TOKSTR); if (lp == NULL) /* skip blank lines */ continue; while (lp != NULL) { if (sbp->alphabet_code == BLASTAA_SEQ_CODE) ch = AMINOACID_TO_NCBISTDAA[toupper(*lp)]; else if (sbp->alphabet_code == BLASTNA_SEQ_CODE) { ch = IUPACNA_TO_BLASTNA[toupper(*lp)]; } else { ch = *lp; } a2chars[a2cnt++] = ch; lp = (char*)strtok(NULL, TOKSTR); } break; /* Exit loop after reading one line. */ } if (a2cnt <= 1) { return 2; } if (sbp->alphabet_code != BLASTNA_SEQ_CODE) { sbp->mat_dim2 = a2cnt; } while (fgets(buf, sizeof(buf), fp) != NULL) { ++lineno; if ((cp = strchr(buf, '\n')) == NULL) { return 2; } if ((cp = strchr(buf, COMMENT_CHR)) != NULL) *cp = NULLB; if ((lp = (char*)strtok(buf, TOKSTR)) == NULL) continue; ch = *lp; cp = (char*) lp; if ((cp = strtok(NULL, TOKSTR)) == NULL) { return 2; } if (a1cnt >= DIM(a1chars)) { return 2; } if (sbp->alphabet_code == BLASTAA_SEQ_CODE) { ch = AMINOACID_TO_NCBISTDAA[toupper(ch)]; } else { if (sbp->alphabet_code == BLASTNA_SEQ_CODE) { ch = IUPACNA_TO_BLASTNA[toupper(ch)]; } } a1chars[a1cnt++] = ch; m = &matrix[(int)ch][0]; index2 = 0; while (cp != NULL) { if (index2 >= (int) a2cnt) { return 2; } strcpy(temp, cp); if (strcasecmp(temp, "na") == 0) { score = BLAST_SCORE_1MIN; } else { if (sscanf(temp, "%lg", &xscore) != 1) { return 2; } /*xscore = MAX(xscore, BLAST_SCORE_1MIN);*/ if (xscore > BLAST_SCORE_1MAX || xscore < BLAST_SCORE_1MIN) { return 2; } xscore += (xscore >= 0. ? 0.5 : -0.5); score = (Int4)xscore; } m[(int)a2chars[index2++]] = score; cp = strtok(NULL, TOKSTR); } } if (a1cnt <= 1) { return 2; } if (sbp->alphabet_code != BLASTNA_SEQ_CODE) { sbp->mat_dim1 = a1cnt; } return 0;}static Int2BlastScoreBlkMaxScoreSet(BlastScoreBlk* sbp){ Int4 score, maxscore; Int4 ** matrix; Int2 index1, index2; sbp->loscore = BLAST_SCORE_1MAX; sbp->hiscore = BLAST_SCORE_1MIN; matrix = sbp->matrix; for (index1=0; index1<sbp->alphabet_size; index1++) { maxscore=BLAST_SCORE_MIN; for (index2=0; index2<sbp->alphabet_size; index2++) { score = matrix[index1][index2]; if (score <= BLAST_SCORE_MIN || score >= BLAST_SCORE_MAX) continue; if (score > maxscore) { maxscore = score; } if (sbp->loscore > score) sbp->loscore = score; if (sbp->hiscore < score) sbp->hiscore = score; } sbp->maxscore[index1] = maxscore; }/* If the lo/hi-scores are BLAST_SCORE_MIN/BLAST_SCORE_MAX, (i.e., forgaps), then use other scores. */ if (sbp->loscore < BLAST_SCORE_1MIN) sbp->loscore = BLAST_SCORE_1MIN; if (sbp->hiscore > BLAST_SCORE_1MAX) sbp->hiscore = BLAST_SCORE_1MAX; return 0;}Int2BlastScoreBlkMatrixLoad(BlastScoreBlk* sbp){ Int2 status = 0; SNCBIPackedScoreMatrix* psm; Int4** matrix = sbp->matrix; int i, j; /* loop indices */ ASSERT(sbp); if (strcasecmp(sbp->name, "BLOSUM62") == 0) { psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum62; } else if (strcasecmp(sbp->name, "BLOSUM45") == 0) { psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum45; } else if (strcasecmp(sbp->name, "BLOSUM80") == 0) { psm = (SNCBIPackedScoreMatrix*) &NCBISM_Blosum80; } else if (strcasecmp(sbp->name, "PAM30") == 0) { psm = (SNCBIPackedScoreMatrix*) &NCBISM_Pam30; } else if (strcasecmp(sbp->name, "PAM70") == 0) { psm = (SNCBIPackedScoreMatrix*) &NCBISM_Pam70; } else { return -1; } /* Initialize with BLAST_SCORE_MIN */ for (i = 0; i < sbp->alphabet_size; i++) { for (j = 0; j < sbp->alphabet_size; j++) { matrix[i][j] = BLAST_SCORE_MIN; } } for (i = 0; i < sbp->alphabet_size; i++) { for (j = 0; j < sbp->alphabet_size; j++) { /* skip selenocysteine and gap */ if (i == AMINOACID_TO_NCBISTDAA['U'] || i == AMINOACID_TO_NCBISTDAA['-'] || j == AMINOACID_TO_NCBISTDAA['U'] || j == AMINOACID_TO_NCBISTDAA['-']) { continue; } matrix[i][j] = NCBISM_GetScore((const SNCBIPackedScoreMatrix*) psm, i, j); } } /* Sets dimensions of matrix. */ sbp->mat_dim1 = sbp->mat_dim2 = sbp->alphabet_size; return status;}Int2BLAST_ScoreBlkMatFill(BlastScoreBlk* sbp, char* matrix_path){ Int2 status = 0; if (sbp->read_in_matrix) { if (matrix_path && *matrix_path != NULLB) { FILE *fp = NULL; char* full_matrix_path = NULL; int path_len = strlen(matrix_path); int buflen = path_len + strlen(sbp->name); full_matrix_path = (char*) malloc((buflen + 1) * sizeof(char)); if (!full_matrix_path) { return -1; } strncpy(full_matrix_path, matrix_path, buflen); strncat(full_matrix_path, sbp->name, buflen - path_len); if ( (fp=fopen(full_matrix_path, "r")) == NULL) { return -1; } sfree(full_matrix_path); if ( (status=BlastScoreBlkMatRead(sbp, fp)) != 0) { fclose(fp); return status; } fclose(fp); } else { if ( (status = BlastScoreBlkMatrixLoad(sbp)) !=0) { return status; } } } else { /* Nucleotide BLAST only! */ if ( (status=BlastScoreBlkMatCreate(sbp)) != 0) return status; } if ( (status=BlastScoreBlkMaxScoreSet(sbp)) != 0) return status; return status;}Blast_ResFreq*Blast_ResFreqDestruct(Blast_ResFreq* rfp){ if (rfp == NULL) return NULL; if (rfp->prob0 != NULL) sfree(rfp->prob0); sfree(rfp); return rfp;}/* Allocates the Blast_ResFreq* and then fills in the frequencies in the probabilities.*/ Blast_ResFreq*Blast_ResFreqNew(const BlastScoreBlk* sbp){ Blast_ResFreq* rfp; if (sbp == NULL) { return NULL; } rfp = (Blast_ResFreq*) calloc(1, sizeof(Blast_ResFreq)); if (rfp == NULL) return NULL; rfp->alphabet_code = sbp->alphabet_code; rfp->prob0 = (double*) calloc(sbp->alphabet_size, sizeof(double)); if (rfp->prob0 == NULL) { rfp = Blast_ResFreqDestruct(rfp); return rfp; } rfp->prob = rfp->prob0 - sbp->alphabet_start; return rfp;}typedef struct BLAST_LetterProb { char ch; double p; } BLAST_LetterProb;#if 0/* Unused for right now, but do not remove *//* M. O. Dayhoff amino acid background frequencies */static BLAST_LetterProb Dayhoff_prob[] = { { 'A', 87.13 }, { 'C', 33.47 }, { 'D', 46.87 }, { 'E', 49.53 }, { 'F', 39.77 }, { 'G', 88.61 }, { 'H', 33.62 }, { 'I', 36.89 }, { 'K', 80.48 }, { 'L', 85.36 }, { 'M', 14.75 }, { 'N', 40.43 }, { 'P', 50.68 }, { 'Q', 38.26 }, { 'R', 40.90 }, { 'S', 69.58 }, { 'T', 58.54 }, { 'V', 64.72 }, { 'W', 10.49 }, { 'Y', 29.92 } };/* Stephen Altschul amino acid background frequencies */static BLAST_LetterProb Altschul_prob[] = { { 'A', 81.00 }, { 'C', 15.00 }, { 'D', 54.00 }, { 'E', 61.00 }, { 'F', 40.00 }, { 'G', 68.00 }, { 'H', 22.00 }, { 'I', 57.00 }, { 'K', 56.00 }, { 'L', 93.00 }, { 'M', 25.00 }, { 'N', 45.00 }, { 'P', 49.00 }, { 'Q', 39.00 }, { 'R', 57.00 }, { 'S', 68.00 }, { 'T', 58.00 }, { 'V', 67.00 }, { 'W', 13.00 }, { 'Y', 32.00 } };#endif/* amino acid background frequencies from Robinson and Robinson */static BLAST_LetterProb Robinson_prob[] = { { 'A', 78.05 }, { 'C', 19.25 }, { 'D', 53.64 }, { 'E', 62.95 }, { 'F', 38.56 }, { 'G', 73.77 }, { 'H', 21.99 }, { 'I', 51.42 }, { 'K', 57.44 }, { 'L', 90.19 }, { 'M', 22.43 }, { 'N', 44.87 }, { 'P', 52.03 }, { 'Q', 42.64 }, { 'R', 51.29 }, { 'S', 71.20 }, { 'T', 58.41 }, { 'V', 64.41 }, { 'W', 13.30 }, { 'Y', 32.16 } };#define STD_AMINO_ACID_FREQS Robinson_probstatic BLAST_LetterProb nt_prob[] = { { 'A', 25.00 }, { 'C', 25.00 }, { 'G', 25.00 }, { 'T', 25.00 } };/* Normalize the frequencies to "norm".*/static Int2Blast_ResFreqNormalize(const BlastScoreBlk* sbp, Blast_ResFreq* rfp, double norm){ Int2 alphabet_stop, index; double sum = 0., p; if (norm == 0.) return 1; alphabet_stop = sbp->alphabet_start + sbp->alphabet_size; for (index=sbp->alphabet_start; index<alphabet_stop; index++) { p = rfp->prob[index]; if (p < 0.) return 1; sum += p; } if (sum <= 0.) return 0; for (index=sbp->alphabet_start; index<alphabet_stop; index++) { rfp->prob[index] /= sum; rfp->prob[index] *= norm; } return 0;}Int2Blast_GetStdAlphabet(Uint1 alphabet_code, Uint1* residues, Uint4 residues_size){ Int2 index; if (residues_size < DIM(STD_AMINO_ACID_FREQS)) return -2; for (index=0; index<(int)DIM(STD_AMINO_ACID_FREQS); index++) { if (alphabet_code == BLASTAA_SEQ_CODE) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -