⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pssm_asn_subs.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
  if (ABP == ASN_BIOSEQ_DESCR) {    ABP_INC2;    asnp->abp = get_astr_seqdescr(asnp, descr);    ABP_INC2; 		/* skip nulls */  }  else { descr[0] = '\0';}  while (ABP == '\0') { ABP_INC2;}  if (ABP != ASN_BIOSEQ_INST) {    fprintf(stderr, "Bioseq - missing ID tag: %2x %2x\n",ABP, asnp->abp[1]);    return asnp->abp;  }  else {     ABP_INC2;    asnp->abp = get_astr_seqinst(asnp, query, nq);    ABP_INC2; 		/* skip nulls */  }  if (end_seq--) {    ABP_INC2;  }  return asnp->abp;}unsigned char *get_pssm_freqs(struct asn_bstruct *asnp,	       double **freqs,	       int n_rows,  	       int n_cols,	       int by_row) {  int i_rows, i_cols;  int in_seq = 0;  double f_val;  asnp->abp = chk_asn_buf(asnp,4);  if (ABP == ASN_SEQ) {    in_seq = 1;    ABP_INC2;    in_seq = 1;  }  if (!by_row) {    for (i_cols = 0; i_cols < n_cols; i_cols++) {      for (i_rows = 0; i_rows < n_rows; i_rows++) {	asnp->abp = get_astr_packedfloat(asnp, &f_val);	freqs[i_cols][i_rows] = f_val;      }    }  }  else {    for (i_rows = 0; i_rows < n_rows; i_rows++) {      for (i_cols = 0; i_cols < n_cols; i_cols++) {	asnp->abp = get_astr_packedfloat(asnp, &f_val);	freqs[i_rows][i_cols] = f_val;      }    }  }  if (in_seq) {asnp->abp +=2;}	/* skip nulls */  ABP_INC2;  return asnp->abp;}unsigned char *get_pssm_intermed(struct asn_bstruct *asnp,		  double **freqs,		  int n_rows,		  int n_cols,		  int by_row) {  asnp->abp = chk_asn_buf(asnp,4);  if (ABP == ASN_SEQ) {    ABP_INC2;    if (ABP == ASN_PSSM_FREQS) {      ABP_INC2;      asnp->abp = get_pssm_freqs(asnp, freqs, n_rows, n_cols, by_row);    }    asnp->abp +=2;	/* skip nulls */  }  ABP_INC2;  return asnp->abp;}#define ASN_PSSM_PARAMS 161#define ASN_PSSM_PARAMS_PSEUDOCNT 160#define ASN_PSSM_PARAMS_RPSPARAMS 161#define ASN_PSSM_RPSPARAMS_MATRIX 160#define ASN_PSSM_RPSPARAMS_GAPOPEN 161#define ASN_PSSM_RPSPARAMS_GAPEXT 162unsigned char *get_pssm_rpsparams(struct asn_bstruct *asnp,	       char *matrix,	       int *gap_open,	       int *gap_ext) {  int end_seq=0;  asnp->abp = chk_asn_buf(asnp,4);  if (ABP == ASN_SEQ) {    ABP_INC2;    end_seq++;  }  if (ABP == ASN_PSSM_RPSPARAMS_MATRIX) {    ABP_INC2;    asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2;  }  if (ABP == ASN_PSSM_RPSPARAMS_GAPOPEN) {    ABP_INC2;    asnp->abp = get_astr_int(asnp, gap_open)+2;  }  if (ABP == ASN_PSSM_RPSPARAMS_GAPEXT) {    ABP_INC2;    asnp->abp = get_astr_int(asnp, gap_ext)+2;  }  if (end_seq) { chk_asn_buf(asnp,end_seq * 2); }  while (end_seq-- > 0) { ABP_INC2; }  return asnp->abp;}unsigned char *get_pssm_params(struct asn_bstruct *asnp,	       int *pseudo_cnts,	       char *matrix,	       int *gap_open,	       int *gap_ext) {  int end_seq=0;  asnp->abp = chk_asn_buf(asnp,6);  if (ABP == ASN_SEQ) {    ABP_INC2;    end_seq++;  }  if (ABP == ASN_PSSM_PARAMS_PSEUDOCNT) {    ABP_INC2;    asnp->abp = get_astr_int(asnp, pseudo_cnts)+2;  }  if (ABP == ASN_PSSM_PARAMS_RPSPARAMS) {    ABP_INC2;    asnp->abp = get_pssm_rpsparams(asnp, matrix, gap_open, gap_ext);    ABP_INC2;  }  while (end_seq-- > 0) { ABP_INC2; }  return asnp->abp;}unsigned char *get_pssm2_scores(struct asn_bstruct *asnp,		 int *have_scores		 ) {  int end_seq=0;  if (have_scores != NULL) *have_scores = 0;  if (ABP == ASN_SEQ) {    end_seq++;    ABP_INC2;  }  if (ABP == '\0') {	/* no scores */    if (end_seq) ABP_INC2;  }  else {    *have_scores = 1;  }  return asnp->abp;}unsigned char *get_pssm2_intermed(struct asn_bstruct *asnp,		   double ***freqs,		   int n_rows,		   int n_cols) {  int i;  double **my_freqs;  if ((my_freqs = (double **) calloc(n_cols, sizeof(double *)))==NULL) {    fprintf(stderr, " cannot allocate freq cols - %d\n", n_cols);    exit(1);  }  if ((my_freqs[0] = (double *) calloc(n_cols * n_rows, sizeof(double)))==NULL) {    fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", n_rows, n_cols);    exit(1);  }  for (i=1; i < n_cols; i++) {    my_freqs[i] = my_freqs[i-1] + n_rows;  }  *freqs = my_freqs;  chk_asn_buf(asnp, 8);  return get_pssm_freqs(asnp, my_freqs, n_rows, n_cols, 0);}intparse_pssm2_asn(struct asn_bstruct *asnp,		int *gi,		char *name,		char *acc,		char *descr,		unsigned char **query, 		int *nq,		int *n_rows,		int *n_cols,		double ***freqs,		int *pseudo_cnts,		char *matrix, 		double *lambda_p) {  int is_protein;  int have_rows=0, have_cols=0;  int have_scores=0;  chk_asn_buf(asnp, 32);  /* first get the query */  if (memcmp(asnp->abp, "\241\2000\200",4) != 0) {    asn_error("parse_pssm2_asn","ASN_PSSM2_QUERY",ASN_PSSM2_QUERY,asnp,4);    return -1;  }  else {    asnp->abp+=4;    asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4;  }  /* finish up the nulls */  /* perhaps we have parsed correctly and do not need this */  /*   while (ABP == '\0') { ABP_INC2;} */  if (memcmp(asnp->abp, "\242\2000\200",4) != 0) {    asn_error("parse_pssm2_asn","ASN_PSSM2_MATRIX",ASN_PSSM2_MATRIX,asnp,4);    return -1;  }  else {    asnp->abp+=4;    if (ABP == ASN_PSSM_IS_PROT) {      ABP_INC2;      asnp->abp = get_astr_bool(asnp, &is_protein)+2;    }    if (ABP == ASN_PSSM2_MATRIX_NAME) {      ABP_INC2;      asnp->abp = get_astr_str(asnp, matrix, MAX_SSTR) + 2;    }    if (ABP ==   ASN_PSSM2_NCOLS) {      ABP_INC2;      asnp->abp = get_astr_int(asnp, n_cols)+2;      have_cols = 1;    }        if (ABP ==  ASN_PSSM2_NROWS) {      ABP_INC2;      asnp->abp = get_astr_int(asnp, n_rows)+2;      have_rows = 1;    }    if (ABP == ASN_PSSM2_SCORES) {      /* right now, this is always empty */      ABP_INC2;      asnp->abp = get_pssm2_scores(asnp, &have_scores) + 2;      if (have_scores) return 0;    }    if (ABP == ASN_PSSM2_KARLIN_K) {      ABP_INC2;      asnp->abp = get_astr_packedfloat(asnp,lambda_p) + 2;    }    if (ABP == ASN_PSSM2_FREQS) {      asnp->abp += 4;      asnp->abp = get_pssm2_intermed(asnp, freqs, *n_rows, *n_cols) + 4;    }  }  return 1;}intparse_pssm_asn(FILE *afd,	       int *gi,	       char *name,	       char *acc,	       char *descr,	       unsigned char **query, 	       int *nq,	       int *n_rows,	       int *n_cols,	       double ***freqs,	       int *pseudo_cnts,	       char *matrix,	       int *gap_open,	       int *gap_ext,	       double *lambda_p) {  int is_protein, pssm_version;  int i;  int have_rows, have_cols, by_col;  double **my_freqs;  struct asn_bstruct *asnp;  asnp = new_asn_bstruct(ASN_BUF);  asnp->fd = afd;  asnp->len = ASN_BUF;  asnp->abp = asnp->buf_max = asnp->buf + ASN_BUF;  chk_asn_buf(asnp, 32);  if (memcmp(asnp->abp, "0\200\240\200",4) != 0) {    fprintf(stderr, "improper PSSM header -");    return -1;  }  else {asnp->abp+=4;}  if (ABP == ASN_IS_INT) {    asnp->abp = get_astr_int(asnp, &pssm_version)+2;    if (pssm_version != 2) {      fprintf(stderr, "PSSM2 version mismatch: %d\n",pssm_version);      return -1;    }    *gap_open = *gap_ext = 0;    return parse_pssm2_asn(asnp, gi, name, acc, descr,			   query, nq,			   n_rows, n_cols, freqs,			   pseudo_cnts, matrix,			   lambda_p);  }  if (ABP == ASN_SEQ) { asnp->abp += 2;  }  if (ABP == ASN_PSSM_IS_PROT ) {    ABP_INC2;    asnp->abp = get_astr_bool(asnp, &is_protein)+2;  }  if (ABP == ASN_PSSM_NROWS ) {    ABP_INC2;    asnp->abp = get_astr_int(asnp, n_rows)+2;    if (*n_rows > 0) { have_rows = 1; }    else {      fprintf(stderr, " bad n_row count\n");      exit(1);    }  }  if (ABP == ASN_PSSM_NCOLS ) {    ABP_INC2;    asnp->abp = get_astr_int(asnp, n_cols)+2;    if (*n_cols > 0) {      have_cols = 1;    }    else {      fprintf(stderr, " bad n_row count\n");      exit(1);    }  }  if (ABP == ASN_PSSM_BYCOL ) {    ABP_INC2;    asnp->abp = get_astr_bool(asnp, &by_col)+2;  }  /* we have read everything up to the query      n_cols gives us the query length, which we can allocate;  */  if (ABP == ASN_PSSM_QUERY ) {    asnp->abp+=4;	/* skip token and CHOICE */    asnp->abp = get_astr_bioseq(asnp, gi, name, acc, descr, query, nq) + 4;    *nq = *n_cols;  }  /* finish up the nulls */  while (ABP == '\0') { asnp->abp += 2;}  if (ABP == ASN_PSSM_INTERMED_DATA) {    if (!have_rows || !have_cols) {      fprintf(stderr, " cannot allocate freq - missing rows/cols - %d/%d\n",	      have_rows, have_cols);      return -1;    }    if ((my_freqs = (double **) calloc(*n_cols, sizeof(double *)))==NULL) {      fprintf(stderr, " cannot allocate freq cols - %d\n", *n_cols);      return -1;    }    if ((my_freqs[0] = (double *) calloc(*n_cols * *n_rows, sizeof(double)))==NULL) {      fprintf(stderr, " cannot allocate freq rows * cols - %d * %d\n", *n_rows, *n_cols);      return -1;    }    for (i=1; i < *n_cols; i++) {      my_freqs[i] = my_freqs[i-1] + *n_rows;    }    *freqs = my_freqs;    ABP_INC2;    asnp->abp = get_pssm_intermed(asnp, my_freqs, *n_rows, *n_cols, by_col);    asnp->abp += 4;  }  if (ABP == ASN_PSSM_PARAMS ) {      ABP_INC2;      asnp->abp = get_pssm_params(asnp, pseudo_cnts, matrix, gap_open, gap_ext) + 2;  }  else if (ABP == 0) {ABP_INC2;}  free_asn_bstruct(asnp);  return 1;}intparse_pssm_asn_fa( FILE *fd, 		   int *n_rows_p, int *n_cols_p,		   unsigned char **query,		   double ***freq2d,		   char *matrix,		   int *gap_open_p,		   int *gap_extend_p,		   double *lambda_p) {  int qi, rj;  int gi;  double tmp_freqs[COMPO_LARGEST_ALPHABET];  char name[MAX_SSTR], acc[MAX_SSTR], descr[MAX_STR];  int  nq;  int pseudo_cnts, ret_val;  /* parse the file */  ret_val = parse_pssm_asn(fd, &gi, name, acc, descr, query, &nq,			   n_rows_p, n_cols_p, freq2d,			   &pseudo_cnts, matrix, gap_open_p, gap_extend_p,			   lambda_p);  if (ret_val <=0) return ret_val;  for (qi = 0; qi < *n_cols_p; qi++) {    for (rj = 0; rj < *n_rows_p; rj++) { tmp_freqs[rj] = (*freq2d)[qi][rj];}    for (rj = 0; rj < COMPO_NUM_TRUE_AA; rj++) {      (*freq2d)[qi][rj] = tmp_freqs[pssm_aa_order[rj]];    }  }  return 1;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -