⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ncbl2_mlib.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 3 页
字号:
      fprintf(stderr," cannot allocate sequence pointers\n");      goto error_r;    }  /*  for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->d_pos_arr[i]);  for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->s_pos_arr[i]);  */  if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {    fprintf(stderr," error reading hdr offsets: %s\n",tname);    goto error_r;  }  for (i=0; i<=m_fptr->max_cnt; i++)#ifdef IS_BIG_ENDIAN    m_fptr->d_pos_arr[i] = f_pos_arr[i];#else    m_fptr->d_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif  if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {    fprintf(stderr," error reading seq offsets: %s\n",tname);    goto error_r;  }  for (i=0; i<=m_fptr->max_cnt; i++) {#ifdef IS_BIG_ENDIAN    m_fptr->s_pos_arr[i] = f_pos_arr[i];#else    m_fptr->s_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif  }  if (dbtype == NTFORMAT) {    /* allocate and ambiguity  offsets */    if ((m_fptr->a_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) {      fprintf(stderr," cannot allocate sequence pointers\n");      goto error_r;    }    /*    for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->a_pos_arr[i]);    */    if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) {      fprintf(stderr," error reading seq offsets: %s\n",tname);      goto error_r;    }    for (i=0; i<=m_fptr->max_cnt; i++) {#ifdef IS_BIG_ENDIAN      m_fptr->a_pos_arr[i] = f_pos_arr[i];#else      m_fptr->a_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif    }  }  /*  for (i=0; i < min(m_fptr->max_cnt,10); i++) {    fprintf(stderr,"%d: %d %d %d\n",i,m_fptr->s_pos_arr[i],m_fptr->a_pos_arr[i],m_fptr->d_pos_arr[i]);  }  */  /* all done with ifile, close it */  fclose(ifile);  free(f_pos_arr);  if (!m_fptr->mm_flg) {    tmp = fgetc(m_fptr->libf);    if (tmp!=NULLB)      fprintf(stderr," phase error: %d:%d found\n",0,tmp);  }  m_fptr->bl_lib_pos = 1;  amb_cnt = 0;  return m_fptr; error_r:  /* here if failure after m_fptr allocated */  free(m_fptr);  return NULL;}void ncbl2_closelib(struct lmf_str *m_fptr){  if (m_fptr->tmp_buf != NULL) {    free(m_fptr->tmp_buf);    m_fptr->tmp_buf_max = 0;  }  if (m_fptr->s_pos_arr !=NULL) {    free(m_fptr->s_pos_arr);    m_fptr->s_pos_arr = NULL;  }  if (m_fptr->a_pos_arr!=NULL) {    free(m_fptr->a_pos_arr);    m_fptr->a_pos_arr = NULL;  }  if (m_fptr->hfile !=NULL ) {    fclose(m_fptr->hfile); m_fptr->hfile=NULL;    free(m_fptr->d_pos_arr); m_fptr->d_pos_arr = NULL;  }  if (m_fptr->oid_list != NULL) {    free(m_fptr->oid_list); m_fptr->oid_list = NULL;    m_fptr->oid_seqs = m_fptr->max_oid = 0;  }#ifdef use_mmap  if (m_fptr->mm_flg) {    munmap(m_fptr->mmap_base,m_fptr->st_size);    m_fptr->mmap_fd = -1;  }  else #endif    if (m_fptr->libf !=NULL ) {fclose(m_fptr->libf); m_fptr->libf=NULL;}}intncbl2_getliba_o(unsigned char *seq,		int maxs,		char *libstr,		int n_libstr,		fseek_t *libpos,		int *lcont,		struct lmf_str *m_fd,		long *l_off){  int tpos;  unsigned int t_mask, t_shift, oid_mask;    /* get to the next valid pointer */    for ( tpos = m_fd->lpos ;tpos <= m_fd->max_oid; tpos++) {    t_mask = tpos / 32;    t_shift = 31 - (tpos % 32);    if ((oid_mask = m_fd->oid_list[t_mask])==0) {  continue; }    if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {      if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);      m_fd->lpos = tpos;	/* already bumped up */      m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];      return ncbl2_getliba(seq, maxs, libstr, n_libstr,			   libpos, lcont, m_fd, l_off);    }  }  return -1;}intncbl2_getliba(unsigned char *seq,	      int maxs,	      char *libstr,	      int n_libstr,	      fseek_t *libpos,	      int *lcont,	      struct lmf_str *m_fd,	      long *l_off){  unsigned char *sptr, *dptr;  int s_chunk, d_len, lib_cnt;  long seqcnt;  long tmp;  static long seq_len;  int sel_status;#if defined(DEBUG) || defined(PCOMPLIB)  int gi, my_db, taxid;  char acc[20], title[21], name[20];#endif    *l_off = 1; start_seq:  lib_cnt = m_fd->lpos;  *libpos = (fseek_t)m_fd->lpos;  if (*lcont==0) {    if (lib_cnt >= m_fd->max_cnt) return -1;	/* no more sequences */    seq_len = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]; /* value is +1 off to get the NULL */    if (m_fd->mm_flg) m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lib_cnt];#if !defined(DEBUG) && !defined(PCOMPLIB)    libstr[0]='\0';#else    /* get the name from the header file */    fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);    if (m_fd->bl_format_ver == FORMATDBV3) {      d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);      fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);      libstr[d_len]='\0';    }    else {      d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);      fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile);      parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len,			&gi, &my_db, acc, name, title, 20, &taxid);      if (m_fd->sel_acc_p && (sel_status = m_fd->sel_acc_p(NULL, gi, m_fd->sel_local))<=0) {	if (sel_status < 0) return (-1);	m_fd->lpos++;	goto start_seq;      }      sprintf(libstr,"gi|%d",gi);    }#endif  }  if (seq_len <= maxs) { /* sequence fits */    seqcnt = seq_len;    m_fd->lpos++;    *lcont = 0;  }  else {		/* doesn't fit */    seqcnt = maxs-1;    (*lcont)++;  }   if (m_fd->mm_flg) sptr = (unsigned char *)m_fd->mmap_addr;  else {    if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,m_fd->libf))!=(size_t)seq_len) {      fprintf(stderr," could not read sequence record: %ld %ld != %ld\n",	      *libpos,tmp,seq_len);      goto error;     }    sptr = seq;  }  if (seq_len <= maxs) {seqcnt = --seq_len;}  /* everything is ready, set up dst. pointer, seq_len */  dptr = seq;  if (aa_b2toa[sptr[seq_len-1]]=='*') seq_len--;  /* if the two alphabets are the same, don't convert */  if (!aa_btof_null)  {    s_chunk = seqcnt/16;    while (s_chunk-- > 0) {      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];      *dptr++ = aa_btof[*sptr++];    }    while (dptr < seq+seqcnt) *dptr++ = aa_btof[*sptr++];  }  if (m_fd->mm_flg) m_fd->mmap_addr = (char *)sptr;  /* we didn't get it all, so reset for more */  if (*lcont) seq_len -= seqcnt;  seq[seqcnt]= EOSEQ;  return (seqcnt);  error:	fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);  fflush(stderr);  return (-1);}int ncbl2_getlibn_o(unsigned char *seq,		int maxs,		char *libstr,		int n_libstr,		fseek_t *libpos,		int *lcont,		struct lmf_str *m_fd,		long *l_off){  int tpos;  unsigned int t_mask, t_shift, oid_mask;    /* get to the next valid pointer */    for (tpos = m_fd->lpos; tpos <= m_fd->max_oid; tpos++) {    t_mask = tpos / 32;    t_shift = 31 - (tpos % 32);    if ((oid_mask = m_fd->oid_list[t_mask])==0) {  continue; }    if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) {      if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0);      m_fd->lpos = tpos;	/* already bumped up */      m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos];      return ncbl2_getlibn(seq, maxs, libstr, n_libstr,			   libpos, lcont, m_fd, l_off);    }  }  return -1;}static char tmp_amb[4096];intncbl2_getlibn(unsigned char *seq,	      int maxs,	      char *libstr,	      int n_libstr,	      fseek_t *libpos,	      int *lcont,	      struct lmf_str *m_fd,	      long *l_off){  unsigned char *sptr, *tptr, stmp;  long seqcnt;  int s_chunk, lib_cnt;  size_t tmp;  char ch;  static long seq_len;  static int c_len,c_pad;  int c_len_set, d_len;  *l_off = 1;  lib_cnt = m_fd->lpos;  *libpos = (fseek_t)lib_cnt;  if (*lcont==0) {	/* not a continuation of previous */    if (lib_cnt >= m_fd->max_cnt) return (-1);    c_len = m_fd->a_pos_arr[lib_cnt]- m_fd->s_pos_arr[lib_cnt];    if (!m_fd->mm_flg) {      if (m_fd->bl_lib_pos != m_fd->s_pos_arr[lib_cnt]) { /* are we positioned to read? */	amb_cnt++;	if ((m_fd->bl_lib_pos - m_fd->s_pos_arr[lib_cnt]) < sizeof(tmp_amb)) {	  /* jump over amb_ray */	  fread(tmp_amb,(size_t)1,(size_t)(m_fd->s_pos_arr[lib_cnt]-m_fd->bl_lib_pos),m_fd->libf);	}	else {	/* fseek over amb_ray */	  fseek(m_fd->libf,m_fd->s_pos_arr[lib_cnt],0);	}	m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt];      }    }    else m_fd->mmap_addr = m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt];#if !defined(DEBUG) && !defined(PCOMPLIB)    libstr[0]='\0';#else    /* get the name from the header file */    fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0);    d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1);    fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile);    libstr[d_len]='\0';#endif  }			/* end of *lcont==0 */  /* To avoid the situation where c_len <= 1; we must anticipate what     c_len will be after this pass.  If it will be <= 64, back off this     time so next time it will be > 64 */  seq_len = c_len*4;  if ((seq_len+4 > maxs) && (seq_len+4 - maxs  <= 256)) {    /* we won't be done but we will have less than 256 to go */    c_len -= 64; seq_len -= 256; c_len_set = 1; maxs -= 256;}  else c_len_set = 0;  /*  fprintf(stderr," lib_cnt: %d %d %d %d\n",lib_cnt,c_len,seq_len,maxs);  */  /* does the rest of the sequence fit? */  if (seq_len <= maxs-4 && !c_len_set) {    seqcnt = c_len;    if (!m_fd->mm_flg) {      if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,m_fd->libf))!=(size_t)seqcnt) {	fprintf(stderr,		" could not read sequence record: %s %lld %ld != %ld: %d\n",		libstr,*libpos,tmp,seqcnt,*seq);	goto error;       }      m_fd->bl_lib_pos += tmp;      sptr = seq + seqcnt;    }    else sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);    *lcont = 0;		/* this is the last chunk */    lib_cnt++;		/* increment to the next sequence */    /* the last byte is either '0' (no remainder) or the last 1-3 chars and the remainder */    c_pad = *(sptr-1);    c_pad &= 0x3;	/* get the last (low) 2 bits */    seq_len -= (4 - c_pad);	/* if the last 2 bits are 0, its a NULL byte */  }  else {	/* get the next chunk, but more to come */    seqcnt = ((maxs+3)/4)-1;    if (!m_fd->mm_flg) {      if ((tmp=fread(seq,(size_t)1,(size_t)(seqcnt),m_fd->libf))!=(size_t)(seqcnt)) {	fprintf(stderr," could not read sequence record: %lld %ld/%ld\n",		*libpos,tmp,seqcnt);	goto error;      }      m_fd->bl_lib_pos += tmp;      sptr = seq + seqcnt;    }    else {      sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt);      m_fd->mmap_addr += seqcnt;    }    seq_len = 4*seqcnt;    c_len -= seqcnt;    if (c_len_set) {c_len += 64; maxs += 256;}    (*lcont)++;/*  hopefully we don't need this because of c_len -= 64. *//*    if (c_len == 1) {#if !defined (USE_MMAP)      c_pad = fgetc(m_fd->libf);      *sptr=c_pad;#else      c_pad = *m_fd->mmap_addr++;      sptr = m_fd->mmap_addr;#endif      c_pad &= 0x3;      seq_len += c_pad;      seqcnt++;      lib_cnt++;      *lcont = 0;    }*/  }  /* point to the last packed byte and to the end of the array     seqcnt is the exact number of bytes read     tptr points to the destination, use multiple of 4 to simplify math     sptr points to the source, note that the last byte will be read 4 cycles     before it is written     */    tptr = seq + 4*seqcnt;  s_chunk = seqcnt/8;  while (s_chunk-- > 0) {    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;  }  while (tptr>seq) {    stmp = *--sptr;    *--tptr = (stmp&3) +1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;    *--tptr = ((stmp >>= 2)&3)+1;  }  /*    for (sptr=seq; sptr < seq+seq_len; sptr++) {    printf("%c",nt[*sptr]);    if ((int)(sptr-seq) % 60 == 59) printf("\n");    }    printf("\n");    */  m_fd->lpos = lib_cnt;  if (seqcnt*4 >= seq_len) {	/* there was enough room */    seq[seq_len]= EOSEQ;    /* printf("%d\n",seq_len); */    return seq_len;  }  else {				/* not enough room */    seq[seqcnt*4]=EOSEQ;    seq_len -= 4*seqcnt;    return (4*seqcnt);  }  error:	fprintf(stderr," error reading %s at %ld\n",libstr,*libpos);  fflush(stderr);  return (-1);}                /*   0     1     2     3    4     5     6    7		     8     9    10    11   12    13    14   15		     16    17 */static char*db_type_arr[] = {"lcl","gib","gim","gii","gb","emb","pir","sp",		  "pat","ref","gnl","gi","dbj","prf","pdb","tpg",		  "tpe","tpd"};voidncbl2_ranlib(char *str,	     int cnt,	     fseek_t libpos,	     char *libstr,	     struct lmf_str *m_fd){  int llen, lib_cnt;  char *bp;  unsigned char *my_buff=NULL;  char descr[2048];  unsigned char *abp;  int gi, taxid;  int my_db;  char db[5], acc[20], name[20];  char title[1024];  int have_my_buff=0;  int have_descr = 0;  lib_cnt = (int)libpos;  llen = m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt];  fseek(m_fd->hfile,m_fd->d_pos_arr[libpos],0);  if (m_fd->bl_format_ver == FORMATDBV3) {    if (llen >= cnt) llen = cnt-1;    fread(str,(size_t)1,(size_t)(llen),m_fd->hfile);  }  else {    if (llen >= m_fd->tmp_buf_max) {      if ((my_buff=(unsigned char *)calloc(llen,sizeof(char)))==NULL) {	fprintf(stderr," cannot allocate ASN.1 buffer: %d\n",llen);	my_buff = (unsigned char *)m_fd->tmp_buf;	llen = m_fd->tmp_buf_max;      }      else have_my_buff = 1;    }    else {       my_buff = (unsigned char *)m_fd->tmp_buf;    }    abp = my_buff;    fread(my_buff,(size_t)1,llen,m_fd->hfile);    do {      abp = parse_fastadl_asn(abp, my_buff+llen,			      &gi, &my_db, acc, name,			      title, sizeof(title), &taxid);      if (gi > 0) {	sprintf(descr,"gi|%d|%s|%s|%s ",gi,db_type_arr[my_db],acc,name);      }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -