⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ncbl2_mlib.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
      else {	if (acc[0] != '\0') sprintf(descr,"%s ",acc);	else descr[0] = '\0';	if (name[0] != '\0' && strcmp(name,"BL_ORD_ID")!=0) sprintf(descr+strlen(descr),"%s ", name);      }      if (my_db == 0 || m_fd->pref_db < 0) {	if (!have_descr) {	  strncpy(str,descr,cnt-1);	  have_descr = 1;	}	else {	  strncat(str,"\001",cnt-strlen(str)-1);	  strncat(str,descr,cnt-strlen(str)-1);	}	strncat(str,title,cnt-strlen(str)-1);	if (strlen(str) >= cnt-1) break;      }      else if (m_fd->pref_db == my_db) {	have_descr = 1;	strncpy(str,descr,cnt-1);	strncat(str,title,cnt-strlen(str)-1);	break;      }    } while (abp);    if (!have_descr) {      strncpy(str,descr,cnt-1);      strncat(str,descr,cnt-strlen(str)-1);    }    if (have_my_buff) free(my_buff);  }  str[cnt-1]='\0';  bp = str;  while((bp=strchr(bp,'\001'))!=NULL) {*bp++=' ';}  if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[libpos],0);  m_fd->lpos = lib_cnt;  m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];}unsigned int bl2_uint4_cvt(unsigned int val){  unsigned int res;#ifdef IS_BIG_ENDIAN  return val;#else /* it better be LITTLE_ENDIAN */  res = ((val&255)*256)+ ((val>>8)&255);  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);  return res;#endif}  unsigned int bl2_long4_cvt(long val){  int val4;  unsigned int res;#ifdef IS_BIG_ENDIAN  val4 = val;  return val4;#else /* it better be LITTLE_ENDIAN */  res = ((val&255)*256)+ ((val>>8)&255);  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);  return res;#endif}  int64_t bl2_long8_cvt(int64_t val){  int64_t res;#ifdef IS_BIG_ENDIAN  return val;#else /* it better be LITTLE_ENDIAN */  res = ((val&255)*256)+ ((val>>8)&255);  res = (res<<16) + (((val>>16)&255)*256) + ((val>>24)&255);#ifdef BIG_LIB64  res = (res<<16) + (((val>>32)&255)*256) + ((val>>40)&255);  res = (res<<16) + (((val>>48)&255)*256) + ((val>>56)&255);#else  fprintf(stderr,"Cannot use bl2_long8_cvt without 64-bit longs\n");  exit(1);#endif  return res;#endif}  void src_int4_read(FILE *fd,  int *val){#ifdef IS_BIG_ENDIAN  fread((char *)val,(size_t)4,(size_t)1,fd);#else  unsigned char b[4];  fread((char *)&b[0],(size_t)1,(size_t)4,fd);  *val = 0;  *val = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)	  +(int)b[3];#endif}void src_long4_read(FILE *fd,  long *valp){  int val4;#ifdef IS_BIG_ENDIAN  fread(&val4,(size_t)4,(size_t)1,fd);  *valp = val4;#else  unsigned char b[4];  fread((char *)&b[0],(size_t)1,(size_t)4,fd);  val4 = 0;  val4 = (int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)	  +(int)b[3];  *valp = val4;#endif}void src_uint4_read(FILE *fd,  unsigned int *valp){#ifdef IS_BIG_ENDIAN  fread(valp,(size_t)4,(size_t)1,fd);#else  unsigned char b[4];  fread((char *)&b[0],(size_t)1,(size_t)4,fd);  *valp = 0;  *valp = (unsigned int)((int)((int)(b[0]<<8)+(int)b[1]<<8)+(int)b[2]<<8)	  +(int)b[3];#endif}voidsrc_long8_read(FILE *fd,  int64_t *val){#ifdef IS_BIG_ENDIAN  fread((void *)val,(size_t)8,(size_t)1,fd);#else  unsigned char b[8];  fread((char *)&b[0],(size_t)1,(size_t)8,fd);  *val = 0;  *val = (long)((((((long)((long)(b[0]<<8)+(long)b[1]<<8)+(long)b[2]<<8)		  +(long)b[3]<<8)+(long)b[4]<<8)+(long)b[5]<<8)		+(long)b[6]<<8)+(long)b[7];#endif}void ncbi_long8_read(FILE *fd,  int64_t *val){  unsigned char b[8];  fread((char *)&b[0],(size_t)1,(size_t)8,fd);  *val = 0;  *val = (long)((((((long)((long)(b[7]<<8)+(long)b[6]<<8)+(long)b[5]<<8)		  +(long)b[4]<<8)+(long)b[3]<<8)+(long)b[2]<<8)		+(long)b[1]<<8)+(long)b[0];}void src_char_read(FILE *fd, char *val){  fread(val,(size_t)1,(size_t)1,fd);}void src_fstr_read(FILE *fd, char *val,  int slen){  fread(val,(size_t)slen,(size_t)1,fd);}voidnewname(char *nname, char *oname, char *suff, int maxn){  strncpy(nname,oname,maxn-1);  strncat(nname,".",1);  strncat(nname,suff,maxn-strlen(nname));}/* these asn functions largely duplicate the pssm_asn_subs.c   functions, except that these functions assume that the entire ASN.1   entity is already in memory *//* these versions also use the old strategy of jumping over trailing   NULLs from within the function - which is dangerous and has been   abandoned  */#define ASN_SEQ 0x30#define ASN_IS_BOOL 1#define ASN_IS_INT 2#define ASN_IS_STR 26unsigned char *get_asn_int(unsigned char *abp, int *val) {  int v_len, v;  v = 0;  if (*abp++ != ASN_IS_INT) { /* check for int */    fprintf(stderr," int missing\n");  }  else {    v_len = *abp++;    while (v_len-- > 0) {      v *= 256;      v += *abp++;    }  }  *val = v;  return abp;}unsigned char *get_asn_text(unsigned char *abp, char *text, int t_len) {  int tch, at_len;  text[0] = '\0';  if (*abp++ != ASN_IS_STR) { /* check for str */    fprintf(stderr," str missing\n");  }  else {    if ((tch = *abp++) > 128) {	/* string length is in next bytes */      tch &= 0x7f;	/* get number of bytes for len */      at_len = 0;      while (tch-- > 0) { at_len = (at_len << 8) + *abp++;}    }    else {      at_len = tch;    }    if ( at_len < t_len-1) {      memcpy(text, abp, at_len);      text[at_len] = '\0';    }    else {      memcpy(text, abp, t_len-1);      text[t_len-1] = '\0';    }    abp += at_len + 2;  }  return abp;}/* something to try to skip over stuff we don't want */unsigned char *get_asn_junk(unsigned char *abp) {  int seq_cnt = 0;  int tmp;  char string[256];  while (*abp) {    if ( *abp  == ASN_SEQ) { abp += 2; seq_cnt++;}    else if ( *abp == ASN_IS_BOOL ) {abp = get_asn_int(abp, &tmp)+2;}    else if ( *abp == ASN_IS_INT ) {abp = get_asn_int(abp, &tmp)+2;}    else if ( *abp == ASN_IS_STR ) {abp = get_asn_text(abp, string, sizeof(string)-1);}  }  while (seq_cnt-- > 0) abp += 2;  return abp;}unsigned char *get_asn_textseq_id(unsigned char *abp, 		   char *name, char *acc){  char release[20], ver_str[10];  int version;  int seqcnt = 0;  ver_str[0]='\0';  if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}  while (*abp) {    switch (*abp) {    case 0xa0:      abp = get_asn_text(abp+2, name, 20);      break;    case 0xa1:      abp = get_asn_text(abp+2, acc, 20);      break;    case 0xa2:      abp = get_asn_text(abp+2, release, sizeof(release));      break;    case 0xa3:      abp = get_asn_int(abp+2, &version)+2;      sprintf(ver_str,".%d",version);      break;    default: abp += 2;    }  }  while (seqcnt-- > 0) abp += 4;  strncat(acc,ver_str,20-strlen(acc));  acc[19]='\0';  return abp;	/* skip 2 NULL's */}#define ASN_OBJID_INT 0xa0#define ASN_OBJID_STR 0xa1unsigned char *get_asn_object_id(unsigned char *abp, char *acc, int *iacc){  int seqcnt = 0;  *iacc = 0;  acc[0] = '\0';  if (*abp == ASN_SEQ) { abp += 2; seqcnt++;}  if (*abp == ASN_OBJID_INT) {    abp = get_asn_int(abp+2,iacc);  }  else if (*abp == ASN_OBJID_STR ) {    abp = get_asn_text(abp+2, acc, 20);    acc[19]='\0';  }  else {    fprintf(stderr, " object_id not str/int %d",*abp);    return abp;  }  abp += 2;  while (seqcnt-- > 0) abp += 2;  return abp;	/* skip 2 NULL's */}unsigned char *get_asn_dbtag(unsigned char *abp, char *name, char *str, int *id_p) {  if (*abp == ASN_SEQ) { abp += 2;}  if (*abp == 0xa0) {  /* get db */    abp = get_asn_text(abp+2, name, 20);  }  else {    fprintf(stderr," missing dbtag:db %d %d\n",abp[0],abp[1]);    abp += 2;  }  if (*abp == 0xa1) {  /* get tag */    abp += 2;    abp += 2; /* skip over id */    if (*abp == 2) abp = get_asn_int(abp,id_p)+2;    else abp = get_asn_text(abp+2, str, 20);  }  else {    fprintf(stderr," missing dbtag:tag %2x %2x\n",abp[0],abp[1]);    abp += 2;  }  return abp+2;	/* skip 2 NULL's */}unsigned char *get_asn_pdb_id(unsigned char *abp, char *acc, char *chain){  int ichain, seq_cnt=0;  if (*abp == ASN_SEQ) { abp += 2; seq_cnt++;}  while (*abp) {    switch (*abp) {    case 0: abp += 2; break;    case 0xa0:	/* mol-id */      abp = get_asn_text(abp+2, acc, 20);      break;    case 0xa1:      abp = get_asn_int(abp+2, &ichain)+2;      chain[0] = ichain;      chain[1] = '\0';      break;    case 0xa2:	/* ignore date - scan until NULL's */      while (*abp++) {}      abp += 2;		/* skip the NULL's */      break;    default: abp+=2;    }  }  while (seq_cnt-- > 0) {abp += 4;}  return abp;}#define ASN_TYPE_MASK 31unsigned char*get_asn_seqid(unsigned char *abp,	       int *gi_p, int *db, char *acc, char *name) {  int db_type, itmp, seq_cnt=0, iacc;  *gi_p = 0;  if (*abp != ASN_SEQ) {    fprintf(stderr, "seqid - missing SEQ 1: %2x %2x\n",abp[0], abp[1]);    return abp;  }  else { abp += 2; seq_cnt++;}  db_type = (*abp & ASN_TYPE_MASK);  if (db_type == 11) { /* gi */    abp = get_asn_int(abp+2,gi_p)+2;  }    while (*abp == ASN_SEQ) {abp += 2; seq_cnt++;}  db_type = (*abp & ASN_TYPE_MASK);  if (db_type > 17) {db_type = 0;}  *db = db_type;  switch(db_type) {  case 0:     abp = get_asn_object_id(abp+2, acc, &iacc);    abp += 2;    break;  case 1:  case 2:    abp = get_asn_int(abp+2,&itmp)+2;    break;  case 11:    abp = get_asn_int(abp+2,&itmp)+2;    break;  case 4:  case 5:  case 6:  case 7:  case 9:  case 12:  case 13:  case 15:  case 16:  case 17:    abp = get_asn_textseq_id(abp+2,name,acc);    break;  case 10:    abp = get_asn_dbtag(abp+2,name,acc,&itmp);  case 14:    abp = get_asn_pdb_id(abp+2,acc,name);    break;  default: abp += 2;  }    while (seq_cnt-- > 0) { abp += 2;}  return abp; /* skip over 2 NULL's */}/*	-- from ncbi/api/fastadl.asn --Blast-def-line ::= SEQUENCE {	title VisibleString OPTIONAL,             -- simple title	seqid SEQUENCE OF Seq-id,                 -- Regular NCBI Seq-Id	taxid  INTEGER OPTIONAL,                  -- taxonomy id	memberships SEQUENCE OF INTEGER OPTIONAL, -- bit arrays	links SEQUENCE OF INTEGER OPTIONAL,       -- bit arrays	other-info SEQUENCE OF INTEGER OPTIONAL   -- for future use (probably genomic sequences)}	-- from ncbi/asn/seq_loc_mod.asn ----*** Sequence identifiers ********************************--*Seq-id ::= CHOICE {    local Object-id ,        -- local use    gibbsq INTEGER ,         -- Geninfo backbone seqid    gibbmt INTEGER ,         -- Geninfo backbone moltype    giim Giimport-id ,       -- Geninfo import id    genbank Textseq-id ,    embl Textseq-id ,    pir Textseq-id ,    swissprot Textseq-id ,    patent Patent-seq-id ,    other Textseq-id ,       -- catch all    general Dbtag ,          -- for other databases    gi INTEGER ,             -- GenInfo Integrated Database    ddbj Textseq-id ,        -- DDBJ    prf Textseq-id ,         -- PRF SEQDB    pdb PDB-seq-id ,         -- PDB sequence    tpg Textseq-id ,         -- Third Party Annot/Seq Genbank    tpe Textseq-id ,         -- Third Party Annot/Seq EMBL    tpd Textseq-id ,         -- Third Party Annot/Seq DDBJ    gpipe Textseq-id         -- Internal NCBI genome pipeline processing ID}*/#define ASN_FADL_TITLE 0xa0#define ASN_FADL_SEQID 0xa1#define ASN_FADL_TAXID 0xa2#define ASN_FADL_MEMBERS 0xa3#define ASN_FADL_LINKS 0xa4#define ASN_FADL_OTHER 0xa5unsigned char *parse_fastadl_asn(unsigned char *asn_buff, unsigned char *asn_max,		  int *gi_p, int *db, char *acc,		  char *name, char *title, int t_len, int *taxid_p) {  unsigned char *abp;  char tmp_db[4], tmp_acc[32], tmp_name[32];  int this_db;  int seq_cnt = 0;  int tmp_gi;  acc[0] = name[0] = db[0] = title[0] = '\0';  abp = asn_buff;  while ( abp < asn_max && *abp) {    if (*abp == ASN_SEQ) { abp += 2; seq_cnt++; }    else if (*abp == ASN_FADL_TITLE) {      abp = get_asn_text(abp+2, title, t_len);    }    else if (*abp == ASN_FADL_SEQID ) {      abp = get_asn_seqid(abp+2, gi_p, db, acc, name);      if (*db > 17) *db = 0;    }    else if (*abp == ASN_FADL_TAXID ) {      abp = get_asn_int(abp+2, taxid_p);      abp += 2;    }    else if (*abp == ASN_FADL_MEMBERS) {      abp = get_asn_junk(abp+2);      abp += 2;    }    else if (*abp == ASN_FADL_LINKS ) {      abp = get_asn_junk(abp+2);      abp += 2;    }    else if (*abp == ASN_FADL_OTHER ) {      abp = get_asn_junk(abp+2);      abp += 2;    }    else {      /*       fprintf(stderr, " Error - missing ASN.1 %2x:%2x:%2x:%2x\n", 	       abp[-2],abp[-1],abp[0],abp[1]); */      abp += 2;    }  }  while (abp < asn_max && *abp == '\0'  ) abp++;  if (abp >= asn_max) return NULL;  else return abp;}voidparse_pal(char *dname, char *msk_name,	  int *oid_seqs, int *max_oid,	  FILE *fd) {  char line[MAX_STR];  while (fgets(line,sizeof(line),fd)) {    if (line[0] == '#') continue;    if (strncmp(line, "DBLIST", 6)==0) {      sscanf(line+7,"%s",dname);    }    else if (strncmp(line, "OIDLIST", 7)==0) {      sscanf(line+8,"%s",msk_name);    }    else if (strncmp(line, "NSEQ", 4)==0) {      sscanf(line+5,"%d",oid_seqs);    }    else if (strncmp(line, "MAXOID", 6)==0) {      sscanf(line+7,"%d",max_oid);    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -