📄 nmgetlib.c
字号:
/* $Name: fa35_03_06 $ - $Id: nmgetlib.c,v 1.46 2007/11/28 13:06:01 wrp Exp $ *//* May, June 1987 - modified for rapid read of database copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson revised (split) version of nmgetaa.c -> renamed nmgetlib.c This version seeks to be a thread safe, no global, library reading program. While adjusting the routines in this file should be relatively easy, ncbl2_mlib.c and mysql_lib.c may be more difficult. nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides the same functions as nxgetaa.c if memory mapping is not used, mmgetaa.c provides the database reading functions if memory mapping is used. The decision to use memory mapping is made on a file-by-file basis. June 2, 1987 - added TFASTA March 30, 1988 - combined ffgetaa, fgetgb; April 8, 1988 - added PIRLIB format for unix Feb 4, 1989 - added universal subroutines for libraries December, 1995 - added range option file.name:1-1000 September, 1999 - added option for mmap()ed files using ".xin" *//* February 4, 1988 - this starts a major revision of the getaa routines. The goal is to be able to seach the following format libraries: 0 - normal FASTA format 1 - full Genbank flatfile format 2 - NBRF/PIR CODATA format 3 - EMBL/Swiss-prot format 4 - Intelligentics format 5 - NBRF/PIR VMS format 6 - GCG 2bit format 11 - NCBI setdb/blastp (1.3.2) AA/NT 12 - NCBI setdb/blastp (2.0) AA/NT 16 - mySQL queries see file altlib.h to confirm numbers*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "defs.h"#include "structs.h"#ifndef SFCHAR#define SFCHAR ':'#endif#define EOSEQ 0#include "uascii.h"/* #include "upam.h" */#define LFCHAR '\015' /* for MWC 5.5 */#include "altlib.h"#include <fcntl.h>#ifndef O_RAW#ifdef O_BINARY#define O_RAW O_BINARY#else#define O_RAW 0#endif /* O_BINARY */#endif /* O_RAW */#ifdef WIN32#define RBSTR "rb" /* read file in binary mode */#else#define RBSTR "r"#endifstruct lmf_str *load_mmap(FILE *, char *, int, int, struct lmf_str *);struct lmf_str *ncbl2_reopen(struct lmf_str *);struct lmf_str *ncbl2_openlib(char *, int);static struct lmf_str *last_m_fptr=NULL;int sel_acc_libstr(char *libstr, int gi, void *ptr);void *sel_acc_libstr_init(FILE *libf, int *acc_off, char fmt_term);int sel_acc_gi(char *libstr, int gi, void *ptr);void *sel_acc_gi_init(FILE *libf, int *acc_off, char fmt_term);int sel_hacc_libstr(char *libstr, int gi, void *ptr);void *sel_hacc_libstr_init(FILE *libf, int *acc_off, char fmt_term);int sel_hacc_gi(char *libstr, int gi, void *ptr);void *sel_hacc_gi_init(FILE *libf, int *acc_off, char fmt_term);#define MAX_ACC_TYPE 4int (*sel_acc_arr[MAX_ACC_TYPE+1])(char *libstr, int gi, void *ptr) = { NULL, sel_acc_libstr, sel_acc_gi, sel_hacc_libstr, sel_hacc_gi};void *(*sel_acc_init[MAX_ACC_TYPE+1])(FILE *libf, int *acc_off, char fmt_term) = { NULL, sel_acc_libstr_init, sel_acc_gi_init, sel_hacc_libstr_init, sel_hacc_gi_init};unsigned int hash_func(char *key);unsigned int fast_hash32 (unsigned int data);#ifdef MYSQL_DBstruct lmf_str *mysql_openlib(char *, int, int *);struct lmf_str *mysql_reopen(struct lmf_str *);#endif#ifdef PGSQL_DBstruct lmf_str *pgsql_openlib(char *, int, int *);struct lmf_str *pgsql_reopen(struct lmf_str *);#endifvoid closelib(struct lmf_str *m_fptr);extern void newname(char *nname, char *oname, char *suff, int maxn);/* a file name for openlib may include a library type suffix */struct lmf_str *openlib(char *lname, int ldnaseq, int *sascii, int outtty, struct lmf_str *om_fptr){ char rline[10],sname[MAX_FN], iname[MAX_FN]; char *bp, *bp1, *bp2; char opt_text[MAX_FN]; /* save text after ':' */ char f_line[MAX_STR]; int wcnt, opnflg; int libtype; int acc_ltype = 1; /* def type is 1, not zero, so that the acc is read */ struct lmf_str *acc_fptr; char af_name[MAX_FN]; FILE *libi=NULL; FILE *libf; int use_stdin; struct lmf_str *m_fptr=NULL; int acc_off; char fmt_term; /* this is currently unavailable - later it can return a value somewhere */ /* if (lname[0]=='#') {return -9;} */ if (om_fptr != NULL && om_fptr->mm_flg) { om_fptr->lpos = 0; return om_fptr; } wcnt = 0; /* number of times to ask for file name */ /* check to see if there is a file option ":1-100" */#ifndef WIN32 if ((bp=strchr(lname,':'))!=NULL && *(bp+1)!='\0') {#else if ((bp=strchr(lname+3,':'))!=NULL && *(bp+1)!='\0') {#endif strncpy(opt_text,bp+1,sizeof(opt_text)); opt_text[sizeof(opt_text)-1]='\0'; *bp = '\0'; } else opt_text[0]='\0'; if (lname[0] == '-' || lname[0] == '@') { use_stdin = 1; } else use_stdin=0; strncpy(sname,lname,sizeof(sname)); sname[sizeof(sname)-1]='\0'; /* check for library type */ if ((bp=strchr(sname,' '))!=NULL) { *bp='\0'; sscanf(bp+1,"%d",&libtype); if (libtype<0 || libtype >= LASTLIB) { fprintf(stderr,"\n invalid library type: %d (>%d)- resetting\n%s\n", libtype,LASTLIB,lname); libtype=0; } } else libtype=0; if (use_stdin && libtype !=0 ) { fprintf(stderr,"\n @/- STDIN libraries must be in FASTA format\n"); return NULL; } /* check to see if file can be open()ed? */ l1: if (libtype<=LASTTXT) { if (!use_stdin) { opnflg=((libf=fopen(sname,RBSTR))!=NULL); } else { libf=stdin; strncpy(sname,"STDIN",sizeof(sname)); sname[sizeof(sname)-1]='\0'; opnflg=1; } } else if (libtype==ACC_LIST) { /* open the file, read the first line, do an openlib on the first line - could openlib be recursive?? */ if (!use_stdin) { opnflg=((libf=fopen(sname,RBSTR))!=NULL); } else { libf=stdin; strncpy(sname,"STDIN",sizeof(sname)); sname[sizeof(sname)-1]='\0'; opnflg=1; } if (!opnflg) { fprintf(stderr, "Cannot open %s library\n",sname); return NULL; } else { /* read in the file line */ if (fgets(f_line, sizeof(f_line), libf)==NULL) { fprintf(stderr, "Cannot read ACC_LIST file line\n"); return NULL; } /* else parse the file line */ if (f_line[0] != '<') { fprintf(stderr, "missing < - %s\n",f_line); return NULL; } if ((bp=strchr(f_line+1,'\r'))!=NULL) {*bp = '\0';} if ((bp=strchr(f_line+1,'\n'))!=NULL) {*bp = '\0';} /* check for accession format */ if ((bp=strchr(f_line+1,':'))!=NULL) { *bp = '\0'; /* access string should be %d %d%c - acc_ltype, acc_off, fmt_term */ sscanf(bp+1,"%d %d%c",&acc_ltype, &acc_off, &fmt_term); /* blank terminator is default */ if (acc_off == 0) acc_off = 1; /* always skip the '>' */ if (fmt_term == '\0') fmt_term = ' '; if (acc_ltype > MAX_ACC_TYPE) {acc_ltype = MAX_ACC_TYPE;} } /* check that we can open the library file */ if ((acc_fptr = openlib(f_line+1, ldnaseq, sascii, outtty, NULL))==NULL) { fprintf(stderr, "Cannot open %s library for ACC_LIST\n",f_line+1); } else { /* note that sel_acc_arr[0] must be NULL */ acc_fptr->sel_acc_p = sel_acc_arr[acc_ltype]; acc_fptr->acc_off = acc_off; /* read in the data */ acc_fptr->sel_local = sel_acc_init[acc_ltype](libf, &acc_fptr->acc_off, fmt_term); return acc_fptr; } } }#ifdef NCBIBL13 else if (libtype==NCBIBL13) opnflg=(ncbl_openlib(sname,ldnaseq)!= -1);#endif#ifdef NCBIBL20 else if (libtype==NCBIBL20) { opnflg=((m_fptr=ncbl2_openlib(sname,ldnaseq))!=NULL); }#endif#ifdef MYSQL_DB /* a mySQL filename contains mySQL commands, not sequences */ else if (libtype==MYSQL_LIB) { opnflg=((m_fptr=mysql_openlib(sname,ldnaseq,sascii))!=NULL); }#endif#ifdef PGSQL_DB /* a mySQL filename contains mySQL commands, not sequences */ else if (libtype==PGSQL_LIB) { opnflg=((m_fptr=pgsql_openlib(sname,ldnaseq,sascii))!=NULL); }#endif if (!opnflg) { /* here if open failed */ if (outtty) { fprintf(stderr,"\n cannot open %s library\n",sname); fprintf(stderr," enter new file name or <RET> to quit "); fflush(stderr); if (fgets(sname,sizeof(sname),stdin)==NULL) return NULL; if ((bp=strchr(sname,'\n'))!=0) *bp='\0'; if (strlen(sname)==0) return NULL; if (++wcnt > 10) return NULL; strncpy(lname,sname,sizeof(lname)-1); lname[sizeof(lname)-1]='\0'; goto l1; } else return NULL; } /* !openflg */ if (libtype <= LASTTXT) { /* modify to re-use the om_fptr if it exists */ if (om_fptr != NULL) { m_fptr = om_fptr; } else { if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) { fprintf(stderr,"\n *** cannot allocate lmf_str (%ld) for %s\n", sizeof(struct lmf_str),sname); return NULL; } if ((m_fptr->lline = calloc(MAX_STR,sizeof(char)))==NULL) { fprintf(stderr,"\n *** cannot allocate lline (%d) for %s\n", MAX_STR,sname); return NULL; } } strncpy(m_fptr->lb_name,sname,MAX_FN); m_fptr->lb_name[MAX_FN-1]='\0'; strncpy(m_fptr->opt_text,opt_text,MAX_FN); m_fptr->opt_text[MAX_FN-1]='\0'; m_fptr->sascii = sascii; m_fptr->libf = libf; m_fptr->lb_type = libtype; m_fptr->acc_off = 1; /* default for FASTA format */ m_fptr->getlib = getliba[libtype]; m_fptr->ranlib = ranliba[libtype]; m_fptr->sel_acc_p = NULL; m_fptr->mm_flg = 0; m_fptr->tot_len = 0; m_fptr->max_len = 0; m_fptr->lib_aa = (ldnaseq==0); } last_m_fptr = m_fptr;#ifdef USE_MMAP /* check for possible mmap()ed files */ if (!use_stdin && (libtype <= LASTTXT) && (getlibam[libtype]!=NULL)) { /* this is a file we can mmap() */ /* look for .xin file */ newname(iname,sname,"xin",sizeof(iname)); if ((libi=fopen(iname,"r"))!=NULL) { /* have a *.xin file, use mmap */ if (load_mmap(libi,sname,libtype,ldnaseq,m_fptr)!=NULL) { fclose(libi); /* close index file */ m_fptr->lb_type = libtype; m_fptr->getlib = getlibam[libtype]; m_fptr->ranlib = ranlibam[libtype]; m_fptr->mm_flg = 1; return m_fptr; } fclose(libi); /* memory mapping failed, but still must close file */ } }#endif if (libtype <= LASTTXT) { m_fptr->lpos = 0; if (fgets(m_fptr->lline,MAX_STR,libf)==NULL) return NULL; } return m_fptr;}voidcloselib(struct lmf_str *m_fptr) {#ifdef MMAP if (m_fptr->mm_flag) {/* don't close memory mapped files close_mmap(m_fptr);*/ return; }#endif if (m_fptr->libf!=NULL && m_fptr->libf != stdin) { fclose(m_fptr->libf); m_fptr->libf = NULL; }#ifdef NCBIBL13 if (m_fptr->lb_type == NCBIBL13) ncbl_closelib(m_fptr);#endif#ifdef NCBIBL20 if (m_fptr->lb_type == NCBIBL20) ncbl2_closelib(m_fptr);#endif#ifdef MYSQL_DB if (m_fptr->lb_type == MYSQL_LIB) mysql_closelib(m_fptr);#endif}struct lmf_str *re_openlib(struct lmf_str *om_fptr, int outtty){ int opnflg; /* if the file mmap()ed and has been opened - use it and return */ if (om_fptr->mm_flg) { return om_fptr; }#ifdef MYSQL_DB /* if this is a mysql database - use it and return */ else if (om_fptr->lb_type == MYSQL_LIB) { return om_fptr; }#endif /* data is available, but file is closed or not memory mapped, open it */ /* no longer check to memory map - because we could not do it before */ opnflg = 1; if (om_fptr->lb_type<=LASTTXT && om_fptr->libf==NULL) opnflg=((om_fptr->libf=fopen(om_fptr->lb_name,RBSTR))!=NULL);#ifdef NCBIBL13 else if (om_fptr->lb_type==NCBIBL13) opnflg=(ncbl_openlib(om_fptr->lb_name,!om_fptr->lib_aa)!= -1);#endif#ifdef NCBIBL20 else if (om_fptr->lb_type==NCBIBL20) { opnflg=((om_fptr=ncbl2_openlib(om_fptr->lb_name,!om_fptr->lib_aa))!=NULL); }#endif#ifdef MYSQL_DB /* a mySQL filename contains mySQL commands, not sequences */ else if (om_fptr->lb_type==MYSQL_LIB) opnflg=(mysql_reopen(om_fptr)!=NULL);#endif if (!opnflg) { fprintf(stderr,"*** could not re_open %s\n",om_fptr->lb_name); return NULL; } /* use the old buffer for the opened text file */ om_fptr->mm_flg = 0; last_m_fptr = om_fptr; return om_fptr;}#ifdef SUPERFAMNUMstatic char tline[512];extern int nsfnum; /* number of superfamily numbers */extern int sfnum[10]; /* superfamily number from types 0 and 5 */extern int nsfnum_n;extern int sfnum_n[10];#endifvoid sf_sort(int *, int);intagetlib(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *lm_fd, long *l_off){ int i; register unsigned char *cp, *seqp, *seqb; register int *ap; unsigned char *seqm, *seqm1; /* int ic, l_start, l_stop, l_limit, rn; */ char *bp, *bp1, *bpa, *tp; int sel_status; seqp = seqb = seq; seqm = &seq[maxs-9]; seqm1 = seqm-1; ap = lm_fd->sascii; if (*lcont==0) { *l_off = 1; start_seq: while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') { if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf); if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1); }#ifdef SUPERFAMNUM strncpy(tline,lm_fd->lline+1,sizeof(tline)); tline[sizeof(tline)-1]='\0'; sfnum[0]=nsfnum=0; if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) { if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0'; if ((bp1=strchr(bp+1,SFCHAR))==NULL) {/* fprintf(stderr," second %c missing: %s\n",SFCHAR,libstr); */ } else { *bp1 = '\0'; i = 0; if ((tp = strtok(bp+1," \t"))!=NULL) { sfnum[i++] = atoi(tp); while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) { if (isdigit(*tp)) sfnum[i++] = atoi(tp); if (i>=9) break; } } sfnum[nsfnum=i]= 0; if (nsfnum>1) sf_sort(sfnum,nsfnum); else { if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr); } } } else { sfnum[0] = nsfnum = 0; }#endif /* get l_off coordinate from @C:123 */ if ((bp=strchr(lm_fd->lline,'@'))!=NULL && !strncmp(bp+1,"C:",2)) { sscanf(bp+3,"%ld",l_off); } strncpy(libstr,lm_fd->lline+lm_fd->acc_off,n_libstr-1); libstr[n_libstr-1]='\0';
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -