⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nmgetlib.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 4 页
字号:
/* $Name: fa35_03_06 $ - $Id: nmgetlib.c,v 1.46 2007/11/28 13:06:01 wrp Exp $ *//*	May, June 1987	- modified for rapid read of database	copyright (c) 1987,1988,1989,1992,1995,2000 William R. Pearson	revised (split) version of nmgetaa.c -> renamed nmgetlib.c	This version seeks to be a thread safe, no global, library	reading program.  While adjusting the routines in this file	should be relatively easy, ncbl2_mlib.c and mysql_lib.c may be	more difficult.	nmgetlib.c and mmgetaa.c are used together.  nmgetlib.c provides	the same functions as nxgetaa.c if memory mapping is not used,	mmgetaa.c provides the database reading functions if memory	mapping is used. The decision to use memory mapping is made on	a file-by-file basis.	June 2, 1987 - added TFASTA	March 30, 1988 - combined ffgetaa, fgetgb;	April 8, 1988 - added PIRLIB format for unix	Feb 4, 1989 - added universal subroutines for libraries	December, 1995 - added range option file.name:1-1000	September, 1999 - added option for mmap()ed files using ".xin" *//*	February 4, 1988 - this starts a major revision of the getaa	routines.  The goal is to be able to seach the following format	libraries:	0 - normal FASTA format	1 - full Genbank flatfile format	2 - NBRF/PIR CODATA format	3 - EMBL/Swiss-prot format	4 - Intelligentics format	5 - NBRF/PIR VMS format	6 - GCG 2bit format	11 - NCBI setdb/blastp (1.3.2) AA/NT	12 - NCBI setdb/blastp (2.0) AA/NT	16 - mySQL queries	see file altlib.h to confirm numbers*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "defs.h"#include "structs.h"#ifndef SFCHAR#define SFCHAR ':'#endif#define EOSEQ 0#include "uascii.h"/* #include "upam.h" */#define LFCHAR '\015'  /* for MWC 5.5 */#include "altlib.h"#include <fcntl.h>#ifndef O_RAW#ifdef O_BINARY#define O_RAW O_BINARY#else#define O_RAW 0#endif		/* O_BINARY */#endif		/* O_RAW */#ifdef WIN32#define RBSTR "rb"	/* read file in binary mode */#else#define RBSTR "r"#endifstruct lmf_str *load_mmap(FILE *, char *, int, int, struct lmf_str *);struct lmf_str *ncbl2_reopen(struct lmf_str *);struct lmf_str *ncbl2_openlib(char *, int);static struct lmf_str *last_m_fptr=NULL;int sel_acc_libstr(char *libstr, int gi, void *ptr);void *sel_acc_libstr_init(FILE *libf, int *acc_off, char fmt_term);int sel_acc_gi(char *libstr, int gi, void *ptr);void *sel_acc_gi_init(FILE *libf, int *acc_off, char fmt_term);int sel_hacc_libstr(char *libstr, int gi, void *ptr);void *sel_hacc_libstr_init(FILE *libf, int *acc_off, char fmt_term);int sel_hacc_gi(char *libstr, int gi, void *ptr);void *sel_hacc_gi_init(FILE *libf, int *acc_off, char fmt_term);#define MAX_ACC_TYPE 4int (*sel_acc_arr[MAX_ACC_TYPE+1])(char *libstr, int gi, void *ptr) = {  NULL, sel_acc_libstr, sel_acc_gi, sel_hacc_libstr, sel_hacc_gi};void *(*sel_acc_init[MAX_ACC_TYPE+1])(FILE *libf, int *acc_off, char fmt_term) = {  NULL, sel_acc_libstr_init, sel_acc_gi_init, sel_hacc_libstr_init, sel_hacc_gi_init};unsigned int hash_func(char *key);unsigned int fast_hash32 (unsigned int data);#ifdef MYSQL_DBstruct lmf_str *mysql_openlib(char *, int, int *);struct lmf_str *mysql_reopen(struct lmf_str *);#endif#ifdef PGSQL_DBstruct lmf_str *pgsql_openlib(char *, int, int *);struct lmf_str *pgsql_reopen(struct lmf_str *);#endifvoid closelib(struct lmf_str *m_fptr);extern void newname(char *nname, char *oname, char *suff, int maxn);/* a file name for openlib may include a library type suffix */struct lmf_str *openlib(char *lname, int ldnaseq, int *sascii,	int outtty, struct lmf_str *om_fptr){  char rline[10],sname[MAX_FN], iname[MAX_FN];  char *bp, *bp1, *bp2;  char opt_text[MAX_FN];	/* save text after ':' */  char f_line[MAX_STR];  int wcnt, opnflg;  int libtype;  int acc_ltype = 1;	/* def type is 1, not zero, so that the acc is read */  struct lmf_str *acc_fptr;  char af_name[MAX_FN];  FILE *libi=NULL;  FILE *libf;  int use_stdin;  struct lmf_str *m_fptr=NULL;  int acc_off;  char fmt_term;  /* this is currently unavailable - later it can return a value somewhere */  /*  if (lname[0]=='#') {return -9;}  */  if (om_fptr != NULL && om_fptr->mm_flg) {    om_fptr->lpos = 0;    return om_fptr;  }  wcnt = 0;	/* number of times to ask for file name */  /* check to see if there is a file option ":1-100" */#ifndef WIN32  if ((bp=strchr(lname,':'))!=NULL && *(bp+1)!='\0') {#else  if ((bp=strchr(lname+3,':'))!=NULL && *(bp+1)!='\0') {#endif    strncpy(opt_text,bp+1,sizeof(opt_text));    opt_text[sizeof(opt_text)-1]='\0';    *bp = '\0';  }  else opt_text[0]='\0';  if (lname[0] == '-' || lname[0] == '@') {    use_stdin = 1;  }  else use_stdin=0;  strncpy(sname,lname,sizeof(sname));  sname[sizeof(sname)-1]='\0';    /* check for library type */  if ((bp=strchr(sname,' '))!=NULL) {    *bp='\0';    sscanf(bp+1,"%d",&libtype);    if (libtype<0 || libtype >= LASTLIB) {      fprintf(stderr,"\n invalid library type: %d (>%d)- resetting\n%s\n",	      libtype,LASTLIB,lname);      libtype=0;    }  }  else libtype=0;  if (use_stdin && libtype !=0 ) {    fprintf(stderr,"\n @/- STDIN libraries must be in FASTA format\n");    return NULL;  }  /* check to see if file can be open()ed? */ l1:  if (libtype<=LASTTXT) {    if (!use_stdin) {      opnflg=((libf=fopen(sname,RBSTR))!=NULL);    }    else {      libf=stdin;      strncpy(sname,"STDIN",sizeof(sname));      sname[sizeof(sname)-1]='\0';      opnflg=1;    }  }   else if (libtype==ACC_LIST) {    /* open the file, read the first line, do an openlib on the first       line - could openlib be recursive?? */    if (!use_stdin) {      opnflg=((libf=fopen(sname,RBSTR))!=NULL);    }    else {      libf=stdin;      strncpy(sname,"STDIN",sizeof(sname));      sname[sizeof(sname)-1]='\0';      opnflg=1;    }    if (!opnflg) {      fprintf(stderr, "Cannot open %s library\n",sname);      return NULL;    }    else {      /* read in the file line */      if (fgets(f_line, sizeof(f_line), libf)==NULL) {	fprintf(stderr, "Cannot read ACC_LIST file line\n");	return NULL;      }      /* else parse the file line */      if (f_line[0] != '<') {	fprintf(stderr, "missing < - %s\n",f_line); return NULL;      }      if ((bp=strchr(f_line+1,'\r'))!=NULL) {*bp = '\0';}      if ((bp=strchr(f_line+1,'\n'))!=NULL) {*bp = '\0';}      /* check for accession format */      if ((bp=strchr(f_line+1,':'))!=NULL) {	*bp = '\0';	/* access string should be %d %d%c - acc_ltype, acc_off, fmt_term */	sscanf(bp+1,"%d %d%c",&acc_ltype, &acc_off, &fmt_term);	/* blank terminator is default */	if (acc_off == 0) acc_off = 1;	/* always skip the '>' */	if (fmt_term == '\0') fmt_term = ' ';	if (acc_ltype > MAX_ACC_TYPE) {acc_ltype = MAX_ACC_TYPE;}      }      /* check that we can open the library file */      if ((acc_fptr = openlib(f_line+1, ldnaseq, sascii, outtty, NULL))==NULL) {	fprintf(stderr, "Cannot open %s library for ACC_LIST\n",f_line+1);      }      else {	/* note that sel_acc_arr[0] must be NULL */	acc_fptr->sel_acc_p = sel_acc_arr[acc_ltype];	acc_fptr->acc_off = acc_off;	/* read in the data */	acc_fptr->sel_local = sel_acc_init[acc_ltype](libf, &acc_fptr->acc_off, fmt_term);	return acc_fptr;      }    }  }#ifdef NCBIBL13  else if (libtype==NCBIBL13) opnflg=(ncbl_openlib(sname,ldnaseq)!= -1);#endif#ifdef NCBIBL20  else if (libtype==NCBIBL20) {    opnflg=((m_fptr=ncbl2_openlib(sname,ldnaseq))!=NULL);  }#endif#ifdef MYSQL_DB  /* a mySQL filename contains mySQL commands, not sequences */  else if (libtype==MYSQL_LIB) {    opnflg=((m_fptr=mysql_openlib(sname,ldnaseq,sascii))!=NULL);  }#endif#ifdef PGSQL_DB  /* a mySQL filename contains mySQL commands, not sequences */  else if (libtype==PGSQL_LIB) {    opnflg=((m_fptr=pgsql_openlib(sname,ldnaseq,sascii))!=NULL);  }#endif  if (!opnflg) {	/* here if open failed */    if (outtty) {      fprintf(stderr,"\n cannot open %s library\n",sname);      fprintf(stderr," enter new file name or <RET> to quit ");      fflush(stderr);      if (fgets(sname,sizeof(sname),stdin)==NULL) return NULL;      if ((bp=strchr(sname,'\n'))!=0) *bp='\0';      if (strlen(sname)==0) return NULL;      if (++wcnt > 10) return NULL;      strncpy(lname,sname,sizeof(lname)-1);      lname[sizeof(lname)-1]='\0';      goto l1;    }    else return NULL;  }	/* !openflg */  if (libtype <= LASTTXT) {    /* modify to re-use the om_fptr if it exists */    if (om_fptr != NULL) {      m_fptr = om_fptr;    }    else {      if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {	fprintf(stderr,"\n *** cannot allocate lmf_str (%ld) for %s\n",		sizeof(struct lmf_str),sname);	return NULL;      }      if ((m_fptr->lline = calloc(MAX_STR,sizeof(char)))==NULL) {	fprintf(stderr,"\n *** cannot allocate lline (%d) for %s\n",		MAX_STR,sname);	return NULL;      }    }    strncpy(m_fptr->lb_name,sname,MAX_FN);    m_fptr->lb_name[MAX_FN-1]='\0';    strncpy(m_fptr->opt_text,opt_text,MAX_FN);    m_fptr->opt_text[MAX_FN-1]='\0';    m_fptr->sascii = sascii;    m_fptr->libf = libf;    m_fptr->lb_type = libtype;    m_fptr->acc_off = 1;	/* default for FASTA format */    m_fptr->getlib = getliba[libtype];    m_fptr->ranlib = ranliba[libtype];    m_fptr->sel_acc_p = NULL;    m_fptr->mm_flg = 0;    m_fptr->tot_len = 0;    m_fptr->max_len = 0;    m_fptr->lib_aa = (ldnaseq==0);  }  last_m_fptr = m_fptr;#ifdef USE_MMAP  /* check for possible mmap()ed files */  if (!use_stdin && (libtype <= LASTTXT) && (getlibam[libtype]!=NULL)) {    /* this is a file we can mmap() */    /* look for .xin file */    newname(iname,sname,"xin",sizeof(iname));    if ((libi=fopen(iname,"r"))!=NULL) { /* have a *.xin file, use mmap */      if (load_mmap(libi,sname,libtype,ldnaseq,m_fptr)!=NULL) {	fclose(libi);	/* close index file */	m_fptr->lb_type = libtype;	m_fptr->getlib = getlibam[libtype];	m_fptr->ranlib = ranlibam[libtype];	m_fptr->mm_flg = 1;	return m_fptr;      }    fclose(libi);	/* memory mapping failed, but still must close file */    }  }#endif  if (libtype <= LASTTXT) {    m_fptr->lpos = 0;    if (fgets(m_fptr->lline,MAX_STR,libf)==NULL) return NULL;  }  return m_fptr;}voidcloselib(struct lmf_str *m_fptr) {#ifdef MMAP  if (m_fptr->mm_flag) {/* don't close memory mapped files    close_mmap(m_fptr);*/    return;  }#endif  if (m_fptr->libf!=NULL && m_fptr->libf != stdin) {    fclose(m_fptr->libf);    m_fptr->libf = NULL;  }#ifdef NCBIBL13  if (m_fptr->lb_type == NCBIBL13) ncbl_closelib(m_fptr);#endif#ifdef NCBIBL20  if (m_fptr->lb_type == NCBIBL20) ncbl2_closelib(m_fptr);#endif#ifdef MYSQL_DB  if (m_fptr->lb_type == MYSQL_LIB) mysql_closelib(m_fptr);#endif}struct lmf_str *re_openlib(struct lmf_str *om_fptr, int outtty){  int opnflg;  /* if the file mmap()ed and has been opened - use it and return */  if (om_fptr->mm_flg) {    return om_fptr;  }#ifdef MYSQL_DB  /* if this is a mysql database - use it and return */  else if (om_fptr->lb_type == MYSQL_LIB) {    return om_fptr;  }#endif  /* data is available, but file is closed or not memory mapped, open it */  /* no longer check to memory map - because we could not do it before */  opnflg = 1;  if (om_fptr->lb_type<=LASTTXT && om_fptr->libf==NULL)    opnflg=((om_fptr->libf=fopen(om_fptr->lb_name,RBSTR))!=NULL);#ifdef NCBIBL13  else if (om_fptr->lb_type==NCBIBL13)    opnflg=(ncbl_openlib(om_fptr->lb_name,!om_fptr->lib_aa)!= -1);#endif#ifdef NCBIBL20  else if (om_fptr->lb_type==NCBIBL20) {    opnflg=((om_fptr=ncbl2_openlib(om_fptr->lb_name,!om_fptr->lib_aa))!=NULL);  }#endif#ifdef MYSQL_DB  /* a mySQL filename contains mySQL commands, not sequences */  else if (om_fptr->lb_type==MYSQL_LIB)     opnflg=(mysql_reopen(om_fptr)!=NULL);#endif  if (!opnflg) {    fprintf(stderr,"*** could not re_open %s\n",om_fptr->lb_name);    return NULL;  }  /* use the old buffer for the opened text file */  om_fptr->mm_flg = 0;  last_m_fptr =  om_fptr;  return om_fptr;}#ifdef SUPERFAMNUMstatic char tline[512];extern int nsfnum;	/* number of superfamily numbers */extern int sfnum[10];	/* superfamily number from types 0 and 5 */extern int nsfnum_n;extern int sfnum_n[10];#endifvoid sf_sort(int *, int);intagetlib(unsigned char *seq, int maxs,	char *libstr, int n_libstr,	fseek_t *libpos,	int *lcont,	struct lmf_str *lm_fd,	long *l_off){  int i;  register unsigned char *cp, *seqp, *seqb;  register int *ap;  unsigned char *seqm, *seqm1;  /* int ic, l_start, l_stop, l_limit, rn; */  char *bp, *bp1, *bpa, *tp;  int sel_status;  seqp = seqb = seq;  seqm = &seq[maxs-9];  seqm1 = seqm-1;  ap = lm_fd->sascii;  if (*lcont==0) {    *l_off = 1;  start_seq:    while (lm_fd->lline[0]!='>' && lm_fd->lline[0]!=';') {      if (lm_fd->libf != stdin) lm_fd->lpos = FTELL(lm_fd->libf);      if (fgets(lm_fd->lline,MAX_STR,lm_fd->libf)==NULL) return (-1);    }#ifdef SUPERFAMNUM    strncpy(tline,lm_fd->lline+1,sizeof(tline));    tline[sizeof(tline)-1]='\0';    sfnum[0]=nsfnum=0;    if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {      if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';      if ((bp1=strchr(bp+1,SFCHAR))==NULL) {/*	fprintf(stderr," second %c missing: %s\n",SFCHAR,libstr); */      }      else {	*bp1 = '\0';	i = 0;	if ((tp = strtok(bp+1," \t"))!=NULL) {	  sfnum[i++] = atoi(tp);	  while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {	    if (isdigit(*tp)) sfnum[i++] = atoi(tp);	    if (i>=9) break;	  }	}	sfnum[nsfnum=i]= 0;	if (nsfnum>1) sf_sort(sfnum,nsfnum);	else {	  if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);	}      }    }    else {      sfnum[0] = nsfnum = 0;      }#endif    /* get l_off coordinate from @C:123 */    if ((bp=strchr(lm_fd->lline,'@'))!=NULL && !strncmp(bp+1,"C:",2)) {      sscanf(bp+3,"%ld",l_off);    }    strncpy(libstr,lm_fd->lline+lm_fd->acc_off,n_libstr-1);    libstr[n_libstr-1]='\0';

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -