⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pssm_asn_subs.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
/* pssm_asn_subs.c *//* $Name: fa35_03_06 $ - $Id: pssm_asn_subs.c,v 1.26 2008/03/04 14:20:50 wrp Exp $ *//* copyright (C) 2005 by William R. Pearson and the U. of Virginia *//* read_asn_dest modified 26-Jul-2007 to skip over text/bytes if dest is NULL *//* this code is designed to parse the ASN.1 binary encoded scoremat   object produced by blastpgp -C file.ckpt_asn -u 2 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "defs.h"int parse_pssm_asn();int parse_pssm2_asn();intparse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols,		  unsigned char **query, double ***freqs,		  char *matrix, int *gap_open, int *gap_extend,		  double *lambda);#define COMPO_NUM_TRUE_AA 20/**positions of true characters in protein alphabet*//*static int trueCharPositions[COMPO_NUM_TRUE_AA] = {  1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22};*/#define COMPO_LARGEST_ALPHABET 28/*static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};static int alphaConvert[COMPO_LARGEST_ALPHABET] = {  (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15,  16, 19,   17, (-1), 18, (-1), (-1), (-1), (-1), (-1)};*/int pssm_aa_order[20] = { 1,  /*A*/			  16, /*R*/			  13, /*N*/			   4, /*D*/			   3, /*C*/			  15, /*Q*/			   5, /*E*/			   7, /*G*/			   8, /*H*/			   9, /*I*/			  11, /*L*/			  10, /*K*/			  12, /*M*/			   6, /*F*/			  14, /*P*/			  17, /*S*/			  18, /*T*/			  20, /*W*/			  22, /*Y*/			  19}; /*V*/#define ABP *asnp->abp#define ABPP asnp->abp#define ABP_INC2 asnp->abp += 2#define ASN_SEQ 48#define ASN_SET 48#define ASN_SEQOF 49#define ASN_SETOF 49#define ASN_PSSM_QUERY 166#define ASN_PSSM2_VERSION 160#define ASN_PSSM2_QUERY 161#define ASN_PSSM2_MATRIX 162#define ASN_PSSM_IS_PROT 160#define ASN_PSSM_NROWS 162#define ASN_PSSM_NCOLS 163#define ASN_PSSM_BYCOL 165#define ASN_PSSM_INTERMED_DATA 167#define ASN_PSSM_FREQS 162#define ASN_PSSM2_IS_PROTEIN 160#define ASN_PSSM2_MATRIX_NAME 161#define ASN_PSSM2_MATRIX_COMMENT 162	/* not used */#define ASN_PSSM2_NCOLS 163#define ASN_PSSM2_NROWS 164#define ASN_PSSM2_SCORES 165#define ASN_PSSM2_KARLIN_K 166#define ASN_PSSM2_FREQS 167#define ASN_IS_STR 26#define ASN_IS_SSTR 65#define ASN_IS_INT  2#define ASN_IS_BOOL 1#define ASN_IS_OCTSTR 4#define ASN_IS_OCTSSTR 65#define ASN_IS_REAL 0#define ASN_IS_ENUM 10#define ASN_IS_ENUM0 1#define ASN_OBJ_INT 160#define ASN_OBJ_STR 161struct asn_bstruct {  FILE *fd;  unsigned char *buf;  unsigned char *abp;  unsigned char *buf_max;  int len;};#define ASN_BUF 4096void *new_asn_bstruct(int buf_siz) {  struct asn_bstruct *asnp;  if ((asnp=calloc(1,sizeof(struct asn_bstruct)))==NULL) {    fprintf(stderr, "cannot allocate asn_bstruct\n");    exit(1);  }  if ((asnp->buf = (unsigned char *)calloc(buf_siz, sizeof(char))) == NULL ) {    fprintf(stderr, " cannot allocate asn_buf (%d)\n",buf_siz);    exit(1);  }  return asnp;}voidfree_asn_bstruct(struct asn_bstruct *asnp) {  if (asnp == NULL) return;  if (asnp->buf != NULL) free(asnp->buf);  free(asnp);}unsigned char *chk_asn_buf(struct asn_bstruct *asnp, int v) {  int new_buf;    if (v > ASN_BUF) {    fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n",	    v, ASN_BUF);    exit(1);  }  if (asnp->abp + v > asnp->buf_max) {    /* move down the left over stuff */    asnp->len = asnp->buf_max - asnp->abp;    memmove(asnp->buf, asnp->abp, asnp->len);    asnp->abp = asnp->buf;    new_buf = ASN_BUF - asnp->len;        if (asnp->fd && !feof(asnp->fd) && 	(new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) {      asnp->len += new_buf;    }    asnp->buf_max = asnp->buf + asnp->len;    if (asnp->len < v) {      fprintf(stderr, " Unable to read %d bytes\n",v);      exit(1);    }  }  /* otherwise, v bytes are currently in the buffer */  return asnp->abp;}unsigned char *asn_error(char *func, char *token, int tval, 	  struct asn_bstruct *asnp, int len) {  int i;  fprintf(stderr," %s %s [%0x]:",func, token, tval);  for (i=0; i<len; i++) {    fprintf(stderr," %0x",asnp->abp[i]);  }  fprintf(stderr,"\n");  return asnp->abp;}/*    read_asn_dest reads v bytes into oct_str if v <= o_len - otherwise   fails - the correct size buffer must be pre-allocated read_asn_dest   is required for ASN data entities that are longer than ASN_BUF   (1024)      skip over if oct_str==NULL;*/unsigned char *read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) {  int new_buf;  unsigned char *oct_ptr;    if (oct_str != NULL && v > o_len) {    fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n",	    v, o_len);    exit(1);  }  if (asnp->abp + v <= asnp->buf_max) {    if (oct_str != NULL) memmove(oct_str, asnp->abp, v);    return asnp->abp+v;  }  else {    /* move down the left over stuff */    asnp->len = asnp->buf_max - asnp->abp;    if (oct_str != NULL)  memmove(oct_str, asnp->abp, asnp->len);    oct_ptr = oct_str+asnp->len;    v -= asnp->len;    asnp->abp = asnp->buf;    new_buf = ASN_BUF;        while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) {      asnp->len = new_buf;      asnp->buf_max = asnp->buf + asnp->len;      if (v <= new_buf) {	/* we have it all this time */	if (oct_str != NULL)  memmove(oct_ptr, asnp->buf, v);	asnp->len -= v;	asnp->abp = asnp->buf + v;	break;      }      else {	/* we need to read some more */	if (oct_str != NULL)  memmove(oct_ptr, asnp->buf, new_buf);	v -= new_buf;	new_buf = ASN_BUF;      }    }  }  return asnp->buf + v;}unsigned char *get_astr_bool(struct asn_bstruct *asnp, int *val) {  int v_len, v;  asnp->abp = chk_asn_buf(asnp,5);  v = 0;  if (*asnp->abp++ != 1) { /* check for int */    fprintf(stderr," bool missing\n");  }  else {    v_len = *asnp->abp++;    if (v_len != 1) {      fprintf(stderr, "boolean length != 1 : %d\n", v_len);      v = *asnp->abp++;    }    else { v = *asnp->abp++;}  }  *val = v;  return asnp->abp;}unsigned char *get_astr_int(struct asn_bstruct *asnp,	    int *val) {  int v_len, v;  v = 0;  asnp->abp = chk_asn_buf(asnp,8);  if (*asnp->abp++ != ASN_IS_INT) { /* check for int */    return asn_error("get_astr_int", "ASN_IS_INT", ASN_IS_INT, asnp, 4);  }  else {    v_len = *asnp->abp++;    while (v_len-- > 0) {      v *= 256;      v += *asnp->abp++;    }  }  *val = v;  return asnp->abp;}unsigned char *get_astr_real(struct asn_bstruct *asnp,	    double *val) {  int v_len, v;  asnp->abp = chk_asn_buf(asnp,16);  *val = 0.0;  if (ABP != '\0') {    fprintf(stderr," float missing\n");    return asnp->abp;  }  else {    sscanf((char *)ABPP,"%lg",val);    while (ABP) { asnp->abp++;}	/* get to EOS */    asnp->abp++;  }  return asnp->abp;}unsigned char *get_astr_enum(struct asn_bstruct *asnp, int *val) {  int v_len, v;  asnp->abp = chk_asn_buf(asnp,5);  v = 0;  if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */    fprintf(stderr," enum missing\n");  }  else {    v_len = *asnp->abp++;    while (v_len-- > 0) { v *= 256;  v += *asnp->abp++; }  }  *val = v;  return asnp->abp;}unsigned char *get_astr_packedfloat(struct asn_bstruct *asnp, double *val) {  int v_len, v;  char tmp_str[64];  asnp->abp = chk_asn_buf(asnp,2);  v = 0;  if (*asnp->abp++ != 9) { /* check for packed float */    fprintf(stderr," float missing\n");    *val = 0;    return asnp->abp;  }  else {    v_len = *asnp->abp++;    if (v_len > 63) {      fprintf(stderr," real string too long: %d\n",v_len);    }    asnp->abp = chk_asn_buf(asnp,v_len);    if (v_len == 2  && *asnp->abp == '\0' && *(asnp->abp+1)=='0') {      ABP_INC2;      *val = 0.0;    }    else {	/* copy and scan it */      if (*asnp->abp != '\0') {	fprintf(stderr, " packedfloat - expected 0, got %d\n", *asnp->abp);	*val = -1.0;	return asnp->abp;      }      asnp->abp++;      strncpy(tmp_str, (char *)asnp->abp, sizeof(tmp_str)-1);      tmp_str[v_len-1] = '\0';      tmp_str[63] = '\0';      sscanf(tmp_str,"%lg",val);      asnp->abp += v_len-1;    }  }  return asnp->abp;}unsigned char *get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) {  int v_len, tv_len;  asnp->abp = chk_asn_buf(asnp,2);  if (text != NULL) text[0] = '\0';  if (ABP != ASN_IS_STR  && ABP != ASN_IS_SSTR) { /* check for str */    return asn_error("get_astr_str", "ASN_IS_STR", ASN_IS_STR, asnp, 4);  }  asnp->abp++;  v_len = *asnp->abp++;  if (v_len > 128) { /* need to read the length from the next bytes */    tv_len = v_len &0x7f;    asnp->abp = chk_asn_buf(asnp,tv_len);    for (v_len =0; tv_len; tv_len--) { v_len = (v_len << 8) + *asnp->abp++; }  }  /* read v_len bytes */  if (v_len < t_len) { /* the string fits in the buffer */    asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len);  }  else {	/* it does not fit, fill the buffer and skip */    if (t_len > 0)       asnp->abp = read_asn_dest(asnp,t_len, (unsigned char *)text, t_len);    asnp->abp = read_asn_dest(asnp,v_len - t_len, NULL, 0);  }  if (text != NULL && t_len > 0) {text[min(v_len,t_len)]='\0';}  return asnp->abp;}unsigned char *get_astr_octstr(struct asn_bstruct *asnp,	       unsigned char *oct_str,	       int o_len) {  int q_len, v_len;  asnp->abp = chk_asn_buf(asnp,2);  if (ABP == ASN_IS_OCTSTR || ABP == ASN_IS_OCTSSTR) {    ABPP++;    /* get length  of length */    if (ABP > 128) {      v_len = *asnp->abp++ & 0x7f;      asnp->abp = chk_asn_buf(asnp,v_len);      q_len = 0;      while (v_len-- > 0) {	q_len *= 256;	q_len += *asnp->abp++;      }    }    else {      q_len = *asnp->abp++ & 0x7f;    }    if (q_len < o_len) { /* the string fits in the buffer */      asnp->abp = read_asn_dest(asnp,q_len, oct_str, o_len);    }    else {	/* it does not fit, fill the buffer and skip */      asnp->abp = read_asn_dest(asnp,o_len, oct_str, o_len);      asnp->abp = read_asn_dest(asnp,q_len - o_len, NULL, 0);    }    if (oct_str != NULL && o_len > 0) oct_str[min(q_len,o_len)]='\0';    /*    asnp->abp += 2; */	/* skip characters and NULL's */  }  return asnp->abp;}/* something to try to skip over stuff we don't want */unsigned char *get_astr_junk(struct asn_bstruct *asnp) {  int seq_cnt = 0;  int tmp;  char string[256];  while (ABP) {    if ( ABP  == ASN_SEQ) { ABP_INC2; seq_cnt++;}    else if ( ABP == ASN_IS_BOOL ) {      ABP_INC2;      ABPP = get_astr_int(asnp, &tmp) + 2;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -