📄 pssm_asn_subs.c
字号:
/* pssm_asn_subs.c *//* $Name: fa35_03_06 $ - $Id: pssm_asn_subs.c,v 1.26 2008/03/04 14:20:50 wrp Exp $ *//* copyright (C) 2005 by William R. Pearson and the U. of Virginia *//* read_asn_dest modified 26-Jul-2007 to skip over text/bytes if dest is NULL *//* this code is designed to parse the ASN.1 binary encoded scoremat object produced by blastpgp -C file.ckpt_asn -u 2 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "defs.h"int parse_pssm_asn();int parse_pssm2_asn();intparse_pssm_asn_fa(FILE *afd, int *n_rows, int *n_cols, unsigned char **query, double ***freqs, char *matrix, int *gap_open, int *gap_extend, double *lambda);#define COMPO_NUM_TRUE_AA 20/**positions of true characters in protein alphabet*//*static int trueCharPositions[COMPO_NUM_TRUE_AA] = { 1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,22};*/#define COMPO_LARGEST_ALPHABET 28/*static char ncbieaatoa[COMPO_LARGEST_ALPHABET] = {"-ABCDEFGHIJKLMNOPQRSTUVWXYZ"};static int alphaConvert[COMPO_LARGEST_ALPHABET] = { (-1), 0, (-1), 4, 3, 6, 13, 7, 8, 9, 11, 10, 12, 2, 14, 5, 1, 15, 16, 19, 17, (-1), 18, (-1), (-1), (-1), (-1), (-1)};*/int pssm_aa_order[20] = { 1, /*A*/ 16, /*R*/ 13, /*N*/ 4, /*D*/ 3, /*C*/ 15, /*Q*/ 5, /*E*/ 7, /*G*/ 8, /*H*/ 9, /*I*/ 11, /*L*/ 10, /*K*/ 12, /*M*/ 6, /*F*/ 14, /*P*/ 17, /*S*/ 18, /*T*/ 20, /*W*/ 22, /*Y*/ 19}; /*V*/#define ABP *asnp->abp#define ABPP asnp->abp#define ABP_INC2 asnp->abp += 2#define ASN_SEQ 48#define ASN_SET 48#define ASN_SEQOF 49#define ASN_SETOF 49#define ASN_PSSM_QUERY 166#define ASN_PSSM2_VERSION 160#define ASN_PSSM2_QUERY 161#define ASN_PSSM2_MATRIX 162#define ASN_PSSM_IS_PROT 160#define ASN_PSSM_NROWS 162#define ASN_PSSM_NCOLS 163#define ASN_PSSM_BYCOL 165#define ASN_PSSM_INTERMED_DATA 167#define ASN_PSSM_FREQS 162#define ASN_PSSM2_IS_PROTEIN 160#define ASN_PSSM2_MATRIX_NAME 161#define ASN_PSSM2_MATRIX_COMMENT 162 /* not used */#define ASN_PSSM2_NCOLS 163#define ASN_PSSM2_NROWS 164#define ASN_PSSM2_SCORES 165#define ASN_PSSM2_KARLIN_K 166#define ASN_PSSM2_FREQS 167#define ASN_IS_STR 26#define ASN_IS_SSTR 65#define ASN_IS_INT 2#define ASN_IS_BOOL 1#define ASN_IS_OCTSTR 4#define ASN_IS_OCTSSTR 65#define ASN_IS_REAL 0#define ASN_IS_ENUM 10#define ASN_IS_ENUM0 1#define ASN_OBJ_INT 160#define ASN_OBJ_STR 161struct asn_bstruct { FILE *fd; unsigned char *buf; unsigned char *abp; unsigned char *buf_max; int len;};#define ASN_BUF 4096void *new_asn_bstruct(int buf_siz) { struct asn_bstruct *asnp; if ((asnp=calloc(1,sizeof(struct asn_bstruct)))==NULL) { fprintf(stderr, "cannot allocate asn_bstruct\n"); exit(1); } if ((asnp->buf = (unsigned char *)calloc(buf_siz, sizeof(char))) == NULL ) { fprintf(stderr, " cannot allocate asn_buf (%d)\n",buf_siz); exit(1); } return asnp;}voidfree_asn_bstruct(struct asn_bstruct *asnp) { if (asnp == NULL) return; if (asnp->buf != NULL) free(asnp->buf); free(asnp);}unsigned char *chk_asn_buf(struct asn_bstruct *asnp, int v) { int new_buf; if (v > ASN_BUF) { fprintf(stderr," attempt to read %d bytes ASN.1 data > buffer size (%d)\n", v, ASN_BUF); exit(1); } if (asnp->abp + v > asnp->buf_max) { /* move down the left over stuff */ asnp->len = asnp->buf_max - asnp->abp; memmove(asnp->buf, asnp->abp, asnp->len); asnp->abp = asnp->buf; new_buf = ASN_BUF - asnp->len; if (asnp->fd && !feof(asnp->fd) && (new_buf=fread(asnp->buf + asnp->len, sizeof(char), new_buf, asnp->fd)) != 0) { asnp->len += new_buf; } asnp->buf_max = asnp->buf + asnp->len; if (asnp->len < v) { fprintf(stderr, " Unable to read %d bytes\n",v); exit(1); } } /* otherwise, v bytes are currently in the buffer */ return asnp->abp;}unsigned char *asn_error(char *func, char *token, int tval, struct asn_bstruct *asnp, int len) { int i; fprintf(stderr," %s %s [%0x]:",func, token, tval); for (i=0; i<len; i++) { fprintf(stderr," %0x",asnp->abp[i]); } fprintf(stderr,"\n"); return asnp->abp;}/* read_asn_dest reads v bytes into oct_str if v <= o_len - otherwise fails - the correct size buffer must be pre-allocated read_asn_dest is required for ASN data entities that are longer than ASN_BUF (1024) skip over if oct_str==NULL;*/unsigned char *read_asn_dest(struct asn_bstruct *asnp, int v, unsigned char *oct_str, int o_len) { int new_buf; unsigned char *oct_ptr; if (oct_str != NULL && v > o_len) { fprintf(stderr, " read_asn_dest - cannot read %d bytes into %d buffer\n", v, o_len); exit(1); } if (asnp->abp + v <= asnp->buf_max) { if (oct_str != NULL) memmove(oct_str, asnp->abp, v); return asnp->abp+v; } else { /* move down the left over stuff */ asnp->len = asnp->buf_max - asnp->abp; if (oct_str != NULL) memmove(oct_str, asnp->abp, asnp->len); oct_ptr = oct_str+asnp->len; v -= asnp->len; asnp->abp = asnp->buf; new_buf = ASN_BUF; while ((new_buf=fread(asnp->buf, sizeof(char), new_buf, asnp->fd)) != 0) { asnp->len = new_buf; asnp->buf_max = asnp->buf + asnp->len; if (v <= new_buf) { /* we have it all this time */ if (oct_str != NULL) memmove(oct_ptr, asnp->buf, v); asnp->len -= v; asnp->abp = asnp->buf + v; break; } else { /* we need to read some more */ if (oct_str != NULL) memmove(oct_ptr, asnp->buf, new_buf); v -= new_buf; new_buf = ASN_BUF; } } } return asnp->buf + v;}unsigned char *get_astr_bool(struct asn_bstruct *asnp, int *val) { int v_len, v; asnp->abp = chk_asn_buf(asnp,5); v = 0; if (*asnp->abp++ != 1) { /* check for int */ fprintf(stderr," bool missing\n"); } else { v_len = *asnp->abp++; if (v_len != 1) { fprintf(stderr, "boolean length != 1 : %d\n", v_len); v = *asnp->abp++; } else { v = *asnp->abp++;} } *val = v; return asnp->abp;}unsigned char *get_astr_int(struct asn_bstruct *asnp, int *val) { int v_len, v; v = 0; asnp->abp = chk_asn_buf(asnp,8); if (*asnp->abp++ != ASN_IS_INT) { /* check for int */ return asn_error("get_astr_int", "ASN_IS_INT", ASN_IS_INT, asnp, 4); } else { v_len = *asnp->abp++; while (v_len-- > 0) { v *= 256; v += *asnp->abp++; } } *val = v; return asnp->abp;}unsigned char *get_astr_real(struct asn_bstruct *asnp, double *val) { int v_len, v; asnp->abp = chk_asn_buf(asnp,16); *val = 0.0; if (ABP != '\0') { fprintf(stderr," float missing\n"); return asnp->abp; } else { sscanf((char *)ABPP,"%lg",val); while (ABP) { asnp->abp++;} /* get to EOS */ asnp->abp++; } return asnp->abp;}unsigned char *get_astr_enum(struct asn_bstruct *asnp, int *val) { int v_len, v; asnp->abp = chk_asn_buf(asnp,5); v = 0; if (*asnp->abp++ != ASN_IS_ENUM) { /* check for int */ fprintf(stderr," enum missing\n"); } else { v_len = *asnp->abp++; while (v_len-- > 0) { v *= 256; v += *asnp->abp++; } } *val = v; return asnp->abp;}unsigned char *get_astr_packedfloat(struct asn_bstruct *asnp, double *val) { int v_len, v; char tmp_str[64]; asnp->abp = chk_asn_buf(asnp,2); v = 0; if (*asnp->abp++ != 9) { /* check for packed float */ fprintf(stderr," float missing\n"); *val = 0; return asnp->abp; } else { v_len = *asnp->abp++; if (v_len > 63) { fprintf(stderr," real string too long: %d\n",v_len); } asnp->abp = chk_asn_buf(asnp,v_len); if (v_len == 2 && *asnp->abp == '\0' && *(asnp->abp+1)=='0') { ABP_INC2; *val = 0.0; } else { /* copy and scan it */ if (*asnp->abp != '\0') { fprintf(stderr, " packedfloat - expected 0, got %d\n", *asnp->abp); *val = -1.0; return asnp->abp; } asnp->abp++; strncpy(tmp_str, (char *)asnp->abp, sizeof(tmp_str)-1); tmp_str[v_len-1] = '\0'; tmp_str[63] = '\0'; sscanf(tmp_str,"%lg",val); asnp->abp += v_len-1; } } return asnp->abp;}unsigned char *get_astr_str(struct asn_bstruct *asnp, char *text, int t_len) { int v_len, tv_len; asnp->abp = chk_asn_buf(asnp,2); if (text != NULL) text[0] = '\0'; if (ABP != ASN_IS_STR && ABP != ASN_IS_SSTR) { /* check for str */ return asn_error("get_astr_str", "ASN_IS_STR", ASN_IS_STR, asnp, 4); } asnp->abp++; v_len = *asnp->abp++; if (v_len > 128) { /* need to read the length from the next bytes */ tv_len = v_len &0x7f; asnp->abp = chk_asn_buf(asnp,tv_len); for (v_len =0; tv_len; tv_len--) { v_len = (v_len << 8) + *asnp->abp++; } } /* read v_len bytes */ if (v_len < t_len) { /* the string fits in the buffer */ asnp->abp = read_asn_dest(asnp,v_len, (unsigned char *)text, t_len); } else { /* it does not fit, fill the buffer and skip */ if (t_len > 0) asnp->abp = read_asn_dest(asnp,t_len, (unsigned char *)text, t_len); asnp->abp = read_asn_dest(asnp,v_len - t_len, NULL, 0); } if (text != NULL && t_len > 0) {text[min(v_len,t_len)]='\0';} return asnp->abp;}unsigned char *get_astr_octstr(struct asn_bstruct *asnp, unsigned char *oct_str, int o_len) { int q_len, v_len; asnp->abp = chk_asn_buf(asnp,2); if (ABP == ASN_IS_OCTSTR || ABP == ASN_IS_OCTSSTR) { ABPP++; /* get length of length */ if (ABP > 128) { v_len = *asnp->abp++ & 0x7f; asnp->abp = chk_asn_buf(asnp,v_len); q_len = 0; while (v_len-- > 0) { q_len *= 256; q_len += *asnp->abp++; } } else { q_len = *asnp->abp++ & 0x7f; } if (q_len < o_len) { /* the string fits in the buffer */ asnp->abp = read_asn_dest(asnp,q_len, oct_str, o_len); } else { /* it does not fit, fill the buffer and skip */ asnp->abp = read_asn_dest(asnp,o_len, oct_str, o_len); asnp->abp = read_asn_dest(asnp,q_len - o_len, NULL, 0); } if (oct_str != NULL && o_len > 0) oct_str[min(q_len,o_len)]='\0'; /* asnp->abp += 2; */ /* skip characters and NULL's */ } return asnp->abp;}/* something to try to skip over stuff we don't want */unsigned char *get_astr_junk(struct asn_bstruct *asnp) { int seq_cnt = 0; int tmp; char string[256]; while (ABP) { if ( ABP == ASN_SEQ) { ABP_INC2; seq_cnt++;} else if ( ABP == ASN_IS_BOOL ) { ABP_INC2; ABPP = get_astr_int(asnp, &tmp) + 2;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -