📄 pssm_asn_subs.c
字号:
if (ABP == ASN_BIOSEQ_DESCR) { ABP_INC2; asnp->abp = get_astr_seqdescr(asnp, descr); ABP_INC2; /* skip nulls */ } else { descr[0] = '\0';} while (ABP == '\0') { ABP_INC2;} if (ABP != ASN_BIOSEQ_INST) { fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",ABP, asnp->abp[1]); return asnp->abp; } else { ABP_INC2; asnp->abp = get_astr_seqinst(asnp, query, nq); ABP_INC2; /* skip nulls */ } if (end_seq--) { ABP_INC2; } return asnp->abp;}unsigned char *get_pssm_freqs(struct asn_bstruct *asnp, double **freqs, int n_rows, int n_cols, int by_row) { int i_rows, i_cols; int in_seq = 0; double f_val; asnp->abp = chk_asn_buf(asnp,4); if (ABP == ASN_SEQ) { in_seq = 1; ABP_INC2; in_seq = 1; } if (!by_row) { for (i_cols = 0; i_cols < n_cols; i_cols++) { for (i_rows = 0; i_rows < n_rows; i_rows++) { asnp->abp = get_astr_packedfloat(asnp, &f_val); freqs[i_cols][i_rows] = f_val; } } } else { for (i_rows = 0; i_rows < n_rows; i_rows++) { for (i_cols = 0; i_cols < n_cols; i_cols++) { asnp->abp = get_astr_packedfloat(asnp, &f_val); freqs[i_rows][i_cols] = f_val; } } } if (in_seq) {asnp->abp +=2;} /* skip nulls */ ABP_INC2; return asnp->abp;}unsigned char *get_pssm_intermed(struct asn_bstruct *asnp, double **freqs, int n_rows, int n_cols, int by_row) { asnp->abp = chk_asn_buf(asnp,4); if (ABP == ASN_SEQ) { ABP_INC2; if (ABP == ASN_PSSM_FREQS) { ABP_INC2; asnp->abp = get_pssm_freqs(asnp, freqs, n_rows, n_cols, by_row); } asnp->abp +=2; /* skip nulls */ } ABP_INC2; return asnp->abp;}#define ASN_PSSM_PARAMS 161#define ASN_PSSM_PARAMS_PSEUDOCNT 160#define ASN_PSSM_PARAMS_RPSPARAMS 161#define ASN_PSSM_RPSPARAMS_MATRIX 160#define ASN_PSSM_RPSPARAMS_GAPOPEN 161#define ASN_PSSM_RPSPARAMS_GAPEXT 162unsigned char *get_pssm_rpsparams(struct asn_bstruct *asnp, char *matrix, int *gap_open, int *gap_ext) { int end_seq=0; asnp->abp = chk_asn_buf(asnp,4); if (ABP == ASN_SEQ) { ABP_INC2; end_seq++; } if (ABP == ASN_PSSM_RPSPARAMS_MATRIX) { ABP_INC2; asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2; } if (ABP == ASN_PSSM_RPSPARAMS_GAPOPEN) { ABP_INC2; asnp->abp = get_astr_int(asnp, gap_open)+2; } if (ABP == ASN_PSSM_RPSPARAMS_GAPEXT) { ABP_INC2; asnp->abp = get_astr_int(asnp, gap_ext)+2; } if (end_seq) { chk_asn_buf(asnp,end_seq * 2); } while (end_seq-- > 0) { ABP_INC2; } return asnp->abp;}unsigned char *get_pssm_params(struct asn_bstruct *asnp, int *pseudo_cnts, char *matrix, int *gap_open, int *gap_ext) { int end_seq=0; asnp->abp = chk_asn_buf(asnp,6); if (ABP == ASN_SEQ) { ABP_INC2; end_seq++; } if (ABP == ASN_PSSM_PARAMS_PSEUDOCNT) { ABP_INC2; asnp->abp = get_astr_int(asnp, pseudo_cnts)+2; } if (ABP == ASN_PSSM_PARAMS_RPSPARAMS) { ABP_INC2; asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open, gap_ext); ABP_INC2; } while (end_seq-- > 0) { ABP_INC2; } return asnp->abp;}unsigned char *get_pssm2_scores(struct asn_bstruct *asnp, int *have_scores ) { int end_seq=0; if (have_scores != NULL) *have_scores = 0; if (ABP == ASN_SEQ) { end_seq++; ABP_INC2; } if (ABP == '\0') { /* no scores */ if (end_seq) ABP_INC2; } else { *have_scores = 1; } return asnp->abp;}unsigned char *get_pssm2_intermed(struct asn_bstruct *asnp, double ***freqs, int n_rows, int n_cols) { int i; double **my_freqs; if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) { fprintf(stderr, " cannot allocate freq cols - %d\n", n_cols); exit(1); } if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) { fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", n_rows, n_cols); exit(1); } for (i=1; i < n_cols; i++) { my_freqs[i] = my_freqs[i-1] + n_rows; } *freqs = my_freqs; chk_asn_buf(asnp, 8); return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);}intparse_pssm2_asn(struct asn_bstruct *asnp, int *gi, char *name, char *acc, char *descr, unsigned char **query, int *nq, int *n_rows, int *n_cols, double ***freqs, int *pseudo_cnts, char *matrix, double *lambda_p) { int is_protein; int have_rows=0, have_cols=0; int have_scores=0; chk_asn_buf(asnp, 32); /* first get the query */ if (memcmp(asnp->abp, "\241\2000\200",4) != 0) { asn_error("parse_pssm2_asn","ASN_PSSM2_QUERY",ASN_PSSM2_QUERY,asnp,4); return -1; } else { asnp->abp+=4; asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4; } /* finish up the nulls */ /* perhaps we have parsed correctly and do not need this */ /* while (ABP == '\0') { ABP_INC2;} */ if (memcmp(asnp->abp, "\242\2000\200",4) != 0) { asn_error("parse_pssm2_asn","ASN_PSSM2_MATRIX",ASN_PSSM2_MATRIX,asnp,4); return -1; } else { asnp->abp+=4; if (ABP == ASN_PSSM_IS_PROT) { ABP_INC2; asnp->abp = get_astr_bool(asnp, &is_protein)+2; } if (ABP == ASN_PSSM2_MATRIX_NAME) { ABP_INC2; asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2; } if (ABP == ASN_PSSM2_NCOLS) { ABP_INC2; asnp->abp = get_astr_int(asnp, n_cols)+2; have_cols = 1; } if (ABP == ASN_PSSM2_NROWS) { ABP_INC2; asnp->abp = get_astr_int(asnp, n_rows)+2; have_rows = 1; } if (ABP == ASN_PSSM2_SCORES) { /* right now, this is always empty */ ABP_INC2; asnp->abp = get_pssm2_scores(asnp, &have_scores) + 2; if (have_scores) return 0; } if (ABP == ASN_PSSM2_KARLIN_K) { ABP_INC2; asnp->abp = get_astr_packedfloat(asnp,lambda_p) + 2; } if (ABP == ASN_PSSM2_FREQS) { asnp->abp += 4; asnp->abp = get_pssm2_intermed(asnp, freqs, *n_rows, *n_cols) + 4; } } return 1;}intparse_pssm_asn(FILE *afd, int *gi, char *name, char *acc, char *descr, unsigned char **query, int *nq, int *n_rows, int *n_cols, double ***freqs, int *pseudo_cnts, char *matrix, int *gap_open, int *gap_ext, double *lambda_p) { int is_protein, pssm_version; int i; int have_rows, have_cols, by_col; double **my_freqs; struct asn_bstruct *asnp; asnp = new_asn_bstruct(ASN_BUF); asnp->fd = afd; asnp->len = ASN_BUF; asnp->abp = asnp->buf_max = asnp->buf + ASN_BUF; chk_asn_buf(asnp, 32); if (memcmp(asnp->abp, "0\200\240\200",4) != 0) { fprintf(stderr, "improper PSSM header -"); return -1; } else {asnp->abp+=4;} if (ABP == ASN_IS_INT) { asnp->abp = get_astr_int(asnp, &pssm_version)+2; if (pssm_version != 2) { fprintf(stderr, "PSSM2 version mismatch: %d\n",pssm_version); return -1; } *gap_open = *gap_ext = 0; return parse_pssm2_asn(asnp, gi, name, acc, descr, query, nq, n_rows, n_cols, freqs, pseudo_cnts, matrix, lambda_p); } if (ABP == ASN_SEQ) { asnp->abp += 2; } if (ABP == ASN_PSSM_IS_PROT ) { ABP_INC2; asnp->abp = get_astr_bool(asnp, &is_protein)+2; } if (ABP == ASN_PSSM_NROWS ) { ABP_INC2; asnp->abp = get_astr_int(asnp, n_rows)+2; if (*n_rows > 0) { have_rows = 1; } else { fprintf(stderr, " bad n_row count\n"); exit(1); } } if (ABP == ASN_PSSM_NCOLS ) { ABP_INC2; asnp->abp = get_astr_int(asnp, n_cols)+2; if (*n_cols > 0) { have_cols = 1; } else { fprintf(stderr, " bad n_row count\n"); exit(1); } } if (ABP == ASN_PSSM_BYCOL ) { ABP_INC2; asnp->abp = get_astr_bool(asnp, &by_col)+2; } /* we have read everything up to the query n_cols gives us the query length, which we can allocate; */ if (ABP == ASN_PSSM_QUERY ) { asnp->abp+=4; /* skip token and CHOICE */ asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4; *nq = *n_cols; } /* finish up the nulls */ while (ABP == '\0') { asnp->abp += 2;} if (ABP == ASN_PSSM_INTERMED_DATA) { if (!have_rows || !have_cols) { fprintf(stderr, " cannot allocate freq - missing rows/cols - %d/%d\n", have_rows, have_cols); return -1; } if ((my_freqs = (double **) calloc(*n_cols, sizeof(double *)))==NULL) { fprintf(stderr, " cannot allocate freq cols - %d\n", *n_cols); return -1; } if ((my_freqs[0] = (double *) calloc(*n_cols * *n_rows, sizeof(double)))==NULL) { fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", *n_rows, *n_cols); return -1; } for (i=1; i < *n_cols; i++) { my_freqs[i] = my_freqs[i-1] + *n_rows; } *freqs = my_freqs; ABP_INC2; asnp->abp = get_pssm_intermed(asnp, my_freqs, *n_rows, *n_cols, by_col); asnp->abp += 4; } if (ABP == ASN_PSSM_PARAMS ) { ABP_INC2; asnp->abp = get_pssm_params(asnp, pseudo_cnts, matrix, gap_open, gap_ext) + 2; } else if (ABP == 0) {ABP_INC2;} free_asn_bstruct(asnp); return 1;}intparse_pssm_asn_fa( FILE *fd, int *n_rows_p, int *n_cols_p, unsigned char **query, double ***freq2d, char *matrix, int *gap_open_p, int *gap_extend_p, double *lambda_p) { int qi, rj; int gi; double tmp_freqs[COMPO_LARGEST_ALPHABET]; char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR]; int nq; int pseudo_cnts, ret_val; /* parse the file */ ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq, n_rows_p, n_cols_p, freq2d, &pseudo_cnts, matrix, gap_open_p, gap_extend_p, lambda_p); if (ret_val <=0) return ret_val; for (qi = 0; qi < *n_cols_p; qi++) { for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];} for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) { (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]]; } } return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -