📄 nmgetlib.c
字号:
if ((lm_fd->sel_acc_p != NULL) && (sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) { if (sel_status < 0) return (-1); while (strchr((char *)lm_fd->lline,'\n')==NULL) { if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); } if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); goto start_seq; } else { if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0'; if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0'; } if (n_libstr > MAX_UID) { tp = libstr; while (*tp++) if (*tp == '\001' || *tp== '\t') *tp = ' '; } *libpos = lm_fd->lpos; /* make certain we have the end of the line */ while (strchr((char *)lm_fd->lline,'\n')==NULL) { if (strlen(lm_fd->lline)<MAX_STR/2) fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR/2,lm_fd->libf); else fgets(&lm_fd->lline[MAX_STR/2],MAX_STR/2,lm_fd->libf); } lm_fd->lline[MAX_STR-1]='\0'; } lm_fd->lline[0]='\0'; while (seqb<seqm1 && fgets((char *)seqb,(size_t)(seqm-seqb),lm_fd->libf)!=NULL) { if (*seqb=='>') goto new; if (*seqb==';') { if (strchr((char *)seqb,'\n')==NULL) goto cont; continue; } /* removed - used for @P:1-n if (l_limit) { for (cp=seqp; seqp<seqm1 && rn < l_stop && (ic=ap[*cp++])<EL; ) if (ic < NA && ++rn > l_start) *seqp++ = (unsigned char)ic; if (rn > l_stop) goto finish; } else { */ seqp = seqb; for (cp=seqp; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } seqb = seqp; if (*seqp==ES) goto done; if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf); } goto done; new: strncpy(lm_fd->lline,(char *)seqp,MAX_STR); lm_fd->lline[MAX_STR-1]='\0'; /* be certain to get complete line, if possible */ if (strchr(lm_fd->lline,'\n')==NULL) fgets(&lm_fd->lline[strlen(lm_fd->lline)],MAX_STR-strlen(lm_fd->lline),lm_fd->libf); lm_fd->lline[MAX_STR-1]='\0'; if (strchr(lm_fd->lline,'\n')==NULL && strchr((char *)seqp,'\n')!=NULL) lm_fd->lline[strlen(lm_fd->lline)-1]='\n'; goto done; /* removed - used for @P:1-nfinish: while (lm_fd->lline[0]!='>' && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) { if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf); } goto done;*/ cont: fgets(lm_fd->lline,MAX_STR,lm_fd->libf); seqm1 = seqp; done: if (seqp>=seqm1) (*lcont)++; else { *lcont=0; } *seqp = EOSEQ; /* if ((int)(seqp-seq)==0) return 1; */ return (int)(seqp-seq);}voidaranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd){ char *bp; if (lm_fd->libf != stdin) { FSEEK(lm_fd->libf, seek, 0); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); if (lm_fd->lline[0]=='>' || lm_fd->lline[0]==';') { strncpy(str,lm_fd->lline+lm_fd->acc_off,cnt); str[cnt-1]='\0'; if ((bp = strchr(str,'\r'))!=NULL) *bp='\0'; if ((bp = strchr(str,'\n'))!=NULL) *bp='\0'; /* if ((bp = strchr(str,SFCHAR))!=NULL) *bp='\0'; else if ((bp = strchr(str,'\001'))!=NULL) *bp='\0'; else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0'; else str[cnt-1]='\0'; */ bp = str; while (*bp++) if (*bp=='\001' || *bp=='\t') *bp=' '; } else { str[0]='\0'; } } else str[0]='\0';}void lget_ann(struct lmf_str *, char *, int);intlgetlib(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *lm_fd, long *l_off){ register unsigned char *cp, *seqp; register int *ap; unsigned char *seqm, *seqm1; char *bp, *bp_gid; *l_off = 1; seqp = seq; seqm = &seq[maxs-11]; seqm1 = seqm-1; ap = lm_fd->sascii; if (*lcont==0) { while (lm_fd->lline[0]!='L' || lm_fd->lline[1]!='O' || strncmp(lm_fd->lline,"LOCUS",5)) { /* find LOCUS */ lm_fd->lpos = FTELL(lm_fd->libf); if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); if (lm_fd->lfflag) getc(lm_fd->libf); } *libpos= lm_fd->lpos; if (n_libstr <= 21) { strncpy(libstr,&lm_fd->lline[12],12); libstr[12]='\0'; } else { lget_ann(lm_fd,libstr,n_libstr); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); } while (lm_fd->lline[0]!='O' || lm_fd->lline[1]!='R' || strncmp(lm_fd->lline,"ORIGIN",6)) { /* find ORIGIN */ if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); if (lm_fd->lfflag) getc(lm_fd->libf); } } else { for (cp= lm_fd->cpsave; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } } lm_fd->lline[0]='\0'; while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) { if (lm_fd->lfflag) getc(lm_fd->libf); if (lm_fd->lline[0]=='/') goto new; for (cp= (unsigned char *)&lm_fd->lline[10]; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } } goto done;new: lm_fd->lpos = FTELL(lm_fd->libf); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); if (lm_fd->lfflag) getc(lm_fd->libf);done: if (seqp>=seqm1) { lm_fd->cpsave = cp; (*lcont)++; } else *lcont=0; *seqp = EOSEQ; /* if ((int)(seqp-seq)==0) return 1; */ return (int)(seqp-seq);}voidlget_ann(struct lmf_str *lm_fd, char *libstr, int n_libstr) { char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120]; /* copy in locus from lm_fd->lline */ strncpy(locus,&lm_fd->lline[12],sizeof(locus)); if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0'; /* get description */ fgets(desc,sizeof(desc),lm_fd->libf); while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10)) fgets(desc,sizeof(desc),lm_fd->libf); if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0'; /* get accession */ fgets(acc,sizeof(acc),lm_fd->libf); while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) { fgets(acc,sizeof(acc),lm_fd->libf); if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0) break; } if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0'; if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0'; /* get version */ fgets(ver,sizeof(ver),lm_fd->libf); while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) { fgets(ver,sizeof(ver),lm_fd->libf); if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0) break; } if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0'; /* extract gi:123456 from version line */ bp_gid = strchr(&ver[12],':'); if (bp_gid != NULL) { if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0'; bp_gid++; } if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0'; /* build up FASTA header line */ if (bp_gid != NULL) { strncpy(libstr,"gi|",n_libstr-1); strncat(libstr,bp_gid,n_libstr-4); strncat(libstr,"|gb|",n_libstr-20); } else {libstr[0]='\0';} /* if we have a version number, use it, otherwise accession, otherwise locus/description */ if (ver[0]=='V') { strncat(libstr,&ver[12],n_libstr-1-strlen(libstr)); strncat(libstr,"|",n_libstr-1-strlen(libstr)); } else if (acc[0]=='A') { strncat(libstr,&acc[12],n_libstr-1-strlen(libstr)); strncat(libstr," ",n_libstr-1-strlen(libstr)); } strncat(libstr,locus,n_libstr-1-strlen(libstr)); strncat(libstr,&desc[11],n_libstr-1-strlen(libstr)); libstr[n_libstr-1]='\0';}/* this code seeks to provide both the various accession numbers necessary to identify the sequence, and also some description. Unfortunately, the various contributors to Genbank use three slightly different formats for including the accession number.(1)LOCUS HSJ214M20 107422 bp DNA HTG 16-JUN-2000 DEFINITION Homo sapiens chromosome 6 clone RP1-214M20 map p12.1-12.3, *** SEQUENCING IN PROGRESS ***, in unordered pieces. ACCESSION AL121969(2)LOCUS AL359201 117444 bp DNA HTG 15-JUN-2000 DEFINITION Homo sapiens chromosome 1 clone RP4-671C13 map p13.2-21.1, *** SEQUENCING IN PROGRESS ***, in unordered pieces. ACCESSION AL359201(3)LOCUS BB067000 280 bp mRNA EST 19-JUN-2000 DEFINITION BB067000 RIKEN full-length enriched, 15 days embryo male testis Mus musculus cDNA clone 8030456L01 3', mRNA sequence. ACCESSION BB067000This makes it more difficult to both provide the accession number in astandard location and to conserve definition space*/voidlranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd){ char *bp, acc[MAX_STR], desc[MAX_STR]; FSEEK(lm_fd->libf, seek, 0); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); if (lm_fd->lfflag) getc(lm_fd->libf); lget_ann(lm_fd, str, cnt); str[cnt-1]='\0'; FSEEK(lm_fd->libf,seek,0); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); if (lm_fd->lfflag) getc(lm_fd->libf);}intpgetlib(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *lm_fd, long *l_off){ int ic; register unsigned char *cp, *seqp; register int *ap; unsigned char *seqm, *seqm1; *l_off = 1; seqp = seq; seqm = &seq[maxs-11]; seqm1 = seqm-1; ap = lm_fd->sascii; if (*lcont==0) { while (lm_fd->lline[0]!='E' || lm_fd->lline[1]!='N' || strncmp(lm_fd->lline,"ENTRY",5)) { /* find ENTRY */ lm_fd->lpos = FTELL(lm_fd->libf); if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); } strncpy(libstr,&lm_fd->lline[16],8); libstr[8]='\0'; *libpos = lm_fd->lpos; while (lm_fd->lline[2]!='Q' || lm_fd->lline[0]!='S' || strncmp(lm_fd->lline,"SEQUENCE",8)) { /* find SEQUENCE */ if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); } fgets(lm_fd->lline,MAX_STR,lm_fd->libf); /* get the extra line */ } else { for (cp= lm_fd->cpsave; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } if (*seqp==ES) goto done; } lm_fd->lline[0]='\0'; while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) { if (lm_fd->lline[0]=='/') goto new; for (cp= (unsigned char *)&lm_fd->lline[8]; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; }; if (*seqp==ES) goto done; } goto done;new: lm_fd->lpos = FTELL(lm_fd->libf); fgets(lm_fd->lline,MAX_STR,lm_fd->libf);done: if (seqp>=seqm1) { lm_fd->cpsave = cp; (*lcont)++; } else *lcont=0; *seqp = EOSEQ; /* if ((int)(seqp-seq)==0) return 1; */ return (int)(seqp-seq);}voidpranlib(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *lm_fd){ char *bp; FSEEK(lm_fd->libf, seek, 0); fgets(lm_fd->lline,MAX_STR,lm_fd->libf); strncpy(str,&lm_fd->lline[16],8); str[8]='\0'; fgets(lm_fd->lline,MAX_STR,lm_fd->libf); while (lm_fd->lline[0]!='T' || lm_fd->lline[1]!='I' || strncmp(lm_fd->lline,"TITLE",5)) fgets(lm_fd->lline,MAX_STR,lm_fd->libf); strncpy(&str[8],&lm_fd->lline[16],cnt-9); str[cnt-9]='\0'; if ((bp = strchr(str,'\n'))!=NULL) *bp='\0'; FSEEK(lm_fd->libf,seek,0); fgets(lm_fd->lline,MAX_STR,lm_fd->libf);}integetlib(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *lm_fd, long *l_off){ int ll; int ic; register unsigned char *cp, *seqp; register int *ap; unsigned char *seqm, *seqm1; int sel_status; char id[11]; /* Holds Identifier */ *l_off=1; seqp = seq; seqm = &seq[maxs-11]; seqm1 = seqm-1; ap = lm_fd->sascii; if (*lcont==0) { start_seq: while (lm_fd->lline[0]!='I' || lm_fd->lline[1]!='D') { /* find ID */ lm_fd->lpos = FTELL(lm_fd->libf); if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); if (lm_fd->lfflag) getc(lm_fd->libf); } sscanf(&lm_fd->lline[5],"%s",id); sprintf(libstr,"%-12.12s",id); libstr[12]='\0'; if ((lm_fd->sel_acc_p != NULL) && (sel_status = (lm_fd->sel_acc_p)(libstr, 0, lm_fd->sel_local)) <= 0) { if (sel_status < 0) return (-1); if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); goto start_seq; } *libpos = lm_fd->lpos; while (lm_fd->lline[0]!='S' || lm_fd->lline[1]!='Q') { /* find ORIGIN */ if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); if (lm_fd->lfflag) getc(lm_fd->libf); } sscanf(&lm_fd->lline[14],"%ld",&lm_fd->gcg_len); } else { for (cp= lm_fd->cpsave; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } if (*seqp==ES) goto done; } lm_fd->lline[0]='\0'; while (seqp<seqm1 && fgets(lm_fd->lline,MAX_STR,lm_fd->libf)!=NULL) { if (lm_fd->lfflag) getc(lm_fd->libf); if (lm_fd->lline[0]=='/') goto new; lm_fd->lline[70]='\0'; for (cp= (unsigned char *)&lm_fd->lline[5]; seqp<seqm1; ) { if ((*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA) continue; if (*(--seqp)>NA) break; } if (*seqp==ES) goto done; } goto done;new: lm_fd->lpos = FTELL(lm_fd->libf);fgets(lm_fd->lline,MAX_STR,lm_fd->libf);if (lm_fd->lfflag) getc(lm_fd->libf);goto done;done: if (seqp>=seqm1) { lm_fd->cpsave = cp; (*lcont)++; lm_fd->gcg_len -= (long)(seqp-seq);}else *lcont=0;*seqp = EOSEQ;/* if ((int)(seqp-seq)==0) return 1; */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -