⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nmgetlib.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
    if ((lm_fd->sel_acc_p != NULL) &&	(sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) {      if (sel_status < 0) return (-1);      while (strchr((char *)lm_fd->lline,'\n')==NULL) {	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      }      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      goto start_seq;    }    else {      if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';      if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';    }    if (n_libstr > MAX_UID) {      tp = libstr;      while (*tp++) if (*tp == '\001' || *tp== '\t') *tp = ' ';    }    *libpos = lm_fd->lpos;    /* make certain we have the end of the line */    while (strchr((char *)lm_fd->lline,'\n')==NULL) {      if (strlen(lm_fd->lline)<MAX_STR/2) 	fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf);      else 	fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf);    }    lm_fd->lline[MAX_STR-1]='\0';  }  lm_fd->lline[0]='\0';  while (seqb<seqm1 && fgets((char *)seqb,(size_t)(seqm-seqb),lm_fd->libf)!=NULL) {    if (*seqb=='>') goto new;    if (*seqb==';') {      if (strchr((char *)seqb,'\n')==NULL) goto cont;      continue;    }    /* removed - used for @P:1-n        if (l_limit) {       for (cp=seqp; seqp<seqm1 && rn < l_stop && (ic=ap[*cp++])<EL; )       if (ic < NA && ++rn > l_start) *seqp++ = (unsigned char)ic;       if (rn > l_stop) goto finish;       }       else {    */    seqp = seqb;    for (cp=seqp; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }    seqb = seqp;    if (*seqp==ES) goto done;    if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);  }  goto done; new:  strncpy(lm_fd->lline,(char *)seqp,MAX_STR);  lm_fd->lline[MAX_STR-1]='\0';  /* be certain to get complete line, if possible */  if (strchr(lm_fd->lline,'\n')==NULL)    fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf);  lm_fd->lline[MAX_STR-1]='\0';  if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL)    lm_fd->lline[strlen(lm_fd->lline)-1]='\n';  goto done;  /* removed - used for @P:1-nfinish:    while (lm_fd->lline[0]!='>' && 	  fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {     if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);   }   goto done;*/ cont:  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  seqm1 = seqp; done:  if (seqp>=seqm1) (*lcont)++;  else {    *lcont=0;  }  *seqp = EOSEQ;  /*  if ((int)(seqp-seq)==0) return 1; */  return (int)(seqp-seq);}voidaranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd){  char *bp;  if (lm_fd->libf != stdin) {    FSEEK(lm_fd->libf, seek, 0);    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);    if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') {      strncpy(str,lm_fd->lline+lm_fd->acc_off,cnt);      str[cnt-1]='\0';      if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';      if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';      /*	if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0';	else if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';	else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';	else str[cnt-1]='\0';      */      bp = str;      while (*bp++) if (*bp=='\001' || *bp=='\t') *bp=' ';    }    else {      str[0]='\0';    }  }  else str[0]='\0';}void lget_ann(struct lmf_str *, char *, int);intlgetlib(unsigned char *seq, int maxs,	char *libstr,	int n_libstr,	fseek_t *libpos,	int *lcont,	struct lmf_str *lm_fd,	long *l_off){  register unsigned char *cp, *seqp;  register int *ap;  unsigned char *seqm, *seqm1;  char *bp, *bp_gid;  *l_off = 1;  seqp = seq;  seqm = &seq[maxs-11];  seqm1 = seqm-1;  ap = lm_fd->sascii;  if (*lcont==0) {    while (lm_fd->lline[0]!='L' || lm_fd->lline[1]!='O' || 	   strncmp(lm_fd->lline,"LOCUS",5)) { /* find LOCUS */      lm_fd->lpos = FTELL(lm_fd->libf);      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      if (lm_fd->lfflag) getc(lm_fd->libf);    }    *libpos= lm_fd->lpos;    if (n_libstr <= 21) {      strncpy(libstr,&lm_fd->lline[12],12);      libstr[12]='\0';    }    else {      lget_ann(lm_fd,libstr,n_libstr);      fgets(lm_fd->lline,MAX_STR,lm_fd->libf);    }    while (lm_fd->lline[0]!='O' || lm_fd->lline[1]!='R' ||	   strncmp(lm_fd->lline,"ORIGIN",6)) { /* find ORIGIN */      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      if (lm_fd->lfflag) getc(lm_fd->libf);    }  }  else {    for (cp= lm_fd->cpsave; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }  }  lm_fd->lline[0]='\0';  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {    if (lm_fd->lfflag) getc(lm_fd->libf);    if (lm_fd->lline[0]=='/') goto new;    for (cp= (unsigned char *)&lm_fd->lline[10]; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }  }  goto done;new:  lm_fd->lpos = FTELL(lm_fd->libf);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  if (lm_fd->lfflag) getc(lm_fd->libf);done:  if (seqp>=seqm1) {    lm_fd->cpsave = cp;    (*lcont)++;  }  else *lcont=0;  *seqp = EOSEQ;  /*  if ((int)(seqp-seq)==0) return 1; */  return (int)(seqp-seq);}voidlget_ann(struct lmf_str *lm_fd, char *libstr, int n_libstr) {  char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];  /* copy in locus from lm_fd->lline */  strncpy(locus,&lm_fd->lline[12],sizeof(locus));  if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';  /* get description */  fgets(desc,sizeof(desc),lm_fd->libf);  while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))    fgets(desc,sizeof(desc),lm_fd->libf);  if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';  /* get accession */  fgets(acc,sizeof(acc),lm_fd->libf);  while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {    fgets(acc,sizeof(acc),lm_fd->libf);    if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)      break;  }  if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';  if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';  /* get version */  fgets(ver,sizeof(ver),lm_fd->libf);  while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {    fgets(ver,sizeof(ver),lm_fd->libf);    if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)      break;  }  if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';      /* extract gi:123456 from version line */  bp_gid = strchr(&ver[12],':');  if (bp_gid != NULL) {    if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';    bp_gid++;  }  if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';      /* build up FASTA header line */  if (bp_gid != NULL) {    strncpy(libstr,"gi|",n_libstr-1);    strncat(libstr,bp_gid,n_libstr-4);    strncat(libstr,"|gb|",n_libstr-20);  }  else {libstr[0]='\0';}  /* if we have a version number, use it, otherwise accession, 	 otherwise locus/description */  if (ver[0]=='V') {    strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));    strncat(libstr,"|",n_libstr-1-strlen(libstr));  }  else if (acc[0]=='A') {    strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));    strncat(libstr," ",n_libstr-1-strlen(libstr));  }  strncat(libstr,locus,n_libstr-1-strlen(libstr));  strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));  libstr[n_libstr-1]='\0';}/* this code seeks to provide both the various accession numbers   necessary to identify the sequence, and also some description.   Unfortunately, the various contributors to Genbank use three   slightly different formats for including the accession number.(1)LOCUS       HSJ214M20  107422 bp    DNA             HTG       16-JUN-2000   DEFINITION  Homo sapiens chromosome 6 clone RP1-214M20 map p12.1-12.3, ***               SEQUENCING IN PROGRESS ***, in unordered pieces.   ACCESSION   AL121969(2)LOCUS       AL359201   117444 bp    DNA             HTG       15-JUN-2000   DEFINITION  Homo sapiens chromosome 1 clone RP4-671C13 map p13.2-21.1, ***               SEQUENCING IN PROGRESS ***, in unordered pieces.   ACCESSION   AL359201(3)LOCUS       BB067000      280 bp    mRNA            EST       19-JUN-2000   DEFINITION  BB067000 RIKEN full-length enriched, 15 days embryo male testis Mus               musculus cDNA clone 8030456L01 3', mRNA sequence.   ACCESSION   BB067000This makes it more difficult to both provide the accession number in astandard location and to conserve definition space*/voidlranlib(char *str,	int cnt,	fseek_t seek,	char *libstr,	struct lmf_str *lm_fd){  char *bp, acc[MAX_STR], desc[MAX_STR];  FSEEK(lm_fd->libf, seek, 0);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  if (lm_fd->lfflag) getc(lm_fd->libf);  lget_ann(lm_fd, str, cnt);  str[cnt-1]='\0';  FSEEK(lm_fd->libf,seek,0);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  if (lm_fd->lfflag) getc(lm_fd->libf);}intpgetlib(unsigned char *seq, int maxs,	char *libstr,	int n_libstr,	fseek_t *libpos,	int *lcont,	struct lmf_str *lm_fd,	long *l_off){  int ic;  register unsigned char *cp, *seqp;  register int *ap;  unsigned char *seqm, *seqm1;  *l_off = 1;  seqp = seq;  seqm = &seq[maxs-11];  seqm1 = seqm-1;  ap = lm_fd->sascii;  if (*lcont==0) {    while (lm_fd->lline[0]!='E' || lm_fd->lline[1]!='N' || strncmp(lm_fd->lline,"ENTRY",5))      { /* find ENTRY */	lm_fd->lpos = FTELL(lm_fd->libf);	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      }    strncpy(libstr,&lm_fd->lline[16],8);    libstr[8]='\0';    *libpos = lm_fd->lpos;    while (lm_fd->lline[2]!='Q' || lm_fd->lline[0]!='S' || strncmp(lm_fd->lline,"SEQUENCE",8))      { /* find SEQUENCE */	if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      }    fgets(lm_fd->lline,MAX_STR,lm_fd->libf); /* get the extra line */  }  else {    for (cp= lm_fd->cpsave; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }    if (*seqp==ES) goto done;  }  lm_fd->lline[0]='\0';  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {    if (lm_fd->lline[0]=='/') goto new;    for (cp= (unsigned char *)&lm_fd->lline[8]; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    };    if (*seqp==ES) goto done;  }  goto done;new:  lm_fd->lpos = FTELL(lm_fd->libf);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);done:  if (seqp>=seqm1) {    lm_fd->cpsave = cp;    (*lcont)++;  }  else *lcont=0;  *seqp = EOSEQ;  /*  if ((int)(seqp-seq)==0) return 1; */  return (int)(seqp-seq);}voidpranlib(char *str,	int cnt,	fseek_t seek,	char *libstr,	struct lmf_str *lm_fd){  char *bp;  FSEEK(lm_fd->libf, seek, 0);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  strncpy(str,&lm_fd->lline[16],8);  str[8]='\0';  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  while (lm_fd->lline[0]!='T' || lm_fd->lline[1]!='I' || strncmp(lm_fd->lline,"TITLE",5))    fgets(lm_fd->lline,MAX_STR,lm_fd->libf);  strncpy(&str[8],&lm_fd->lline[16],cnt-9);  str[cnt-9]='\0';  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';  FSEEK(lm_fd->libf,seek,0);  fgets(lm_fd->lline,MAX_STR,lm_fd->libf);}integetlib(unsigned char *seq, int maxs,	char *libstr,	int n_libstr,	fseek_t *libpos,	int *lcont,	struct lmf_str *lm_fd,	long *l_off){  int ll;  int ic;  register unsigned char *cp, *seqp;  register int *ap;  unsigned char *seqm, *seqm1;  int sel_status;  char id[11];  /* Holds Identifier */  *l_off=1;  seqp = seq;  seqm = &seq[maxs-11];  seqm1 = seqm-1;  ap = lm_fd->sascii;  if (*lcont==0) {  start_seq:    while (lm_fd->lline[0]!='I' || lm_fd->lline[1]!='D') { /* find ID */      lm_fd->lpos = FTELL(lm_fd->libf);      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      if (lm_fd->lfflag) getc(lm_fd->libf);    }    sscanf(&lm_fd->lline[5],"%s",id);    sprintf(libstr,"%-12.12s",id);    libstr[12]='\0';    if ((lm_fd->sel_acc_p != NULL) &&	(sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) {      if (sel_status < 0) return (-1);      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      goto start_seq;    }    *libpos = lm_fd->lpos;    while (lm_fd->lline[0]!='S' || lm_fd->lline[1]!='Q') { /* find ORIGIN */      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);      if (lm_fd->lfflag) getc(lm_fd->libf);    }    sscanf(&lm_fd->lline[14],"%ld",&lm_fd->gcg_len);  }  else {    for (cp= lm_fd->cpsave; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }    if (*seqp==ES) goto done;  }  lm_fd->lline[0]='\0';  while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) {    if (lm_fd->lfflag) getc(lm_fd->libf);    if (lm_fd->lline[0]=='/') goto new;    lm_fd->lline[70]='\0';    for (cp= (unsigned char *)&lm_fd->lline[5]; seqp<seqm1; ) {      if ((*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA &&	  (*seqp++=ap[*cp++])<NA) continue;      if (*(--seqp)>NA) break;    }    if (*seqp==ES) goto done;  }  goto done;new:	lm_fd->lpos = FTELL(lm_fd->libf);fgets(lm_fd->lline,MAX_STR,lm_fd->libf);if (lm_fd->lfflag) getc(lm_fd->libf);goto done;done:	if (seqp>=seqm1) {  lm_fd->cpsave = cp;  (*lcont)++;  lm_fd->gcg_len -= (long)(seqp-seq);}else *lcont=0;*seqp = EOSEQ;/* if ((int)(seqp-seq)==0) return 1; */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -