📄 ncbl2_mlib.c
字号:
fprintf(stderr," cannot allocate sequence pointers\n"); goto error_r; } /* for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->d_pos_arr[i]); for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->s_pos_arr[i]); */ if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) { fprintf(stderr," error reading hdr offsets: %s\n",tname); goto error_r; } for (i=0; i<=m_fptr->max_cnt; i++)#ifdef IS_BIG_ENDIAN m_fptr->d_pos_arr[i] = f_pos_arr[i];#else m_fptr->d_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) { fprintf(stderr," error reading seq offsets: %s\n",tname); goto error_r; } for (i=0; i<=m_fptr->max_cnt; i++) {#ifdef IS_BIG_ENDIAN m_fptr->s_pos_arr[i] = f_pos_arr[i];#else m_fptr->s_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif } if (dbtype == NTFORMAT) { /* allocate and ambiguity offsets */ if ((m_fptr->a_pos_arr=(MM_OFF *)calloc((size_t)m_fptr->max_cnt+1,sizeof(MM_OFF)))==NULL) { fprintf(stderr," cannot allocate sequence pointers\n"); goto error_r; } /* for (i=0; i<=m_fptr->max_cnt; i++) src_uint4_read(ifile,&m_fptr->a_pos_arr[i]); */ if (fread(f_pos_arr,(size_t)4,m_fptr->max_cnt+1,ifile)!=m_fptr->max_cnt+1) { fprintf(stderr," error reading seq offsets: %s\n",tname); goto error_r; } for (i=0; i<=m_fptr->max_cnt; i++) {#ifdef IS_BIG_ENDIAN m_fptr->a_pos_arr[i] = f_pos_arr[i];#else m_fptr->a_pos_arr[i] = bl2_uint4_cvt(f_pos_arr[i]);#endif } } /* for (i=0; i < min(m_fptr->max_cnt,10); i++) { fprintf(stderr,"%d: %d %d %d\n",i,m_fptr->s_pos_arr[i],m_fptr->a_pos_arr[i],m_fptr->d_pos_arr[i]); } */ /* all done with ifile, close it */ fclose(ifile); free(f_pos_arr); if (!m_fptr->mm_flg) { tmp = fgetc(m_fptr->libf); if (tmp!=NULLB) fprintf(stderr," phase error: %d:%d found\n",0,tmp); } m_fptr->bl_lib_pos = 1; amb_cnt = 0; return m_fptr; error_r: /* here if failure after m_fptr allocated */ free(m_fptr); return NULL;}void ncbl2_closelib(struct lmf_str *m_fptr){ if (m_fptr->tmp_buf != NULL) { free(m_fptr->tmp_buf); m_fptr->tmp_buf_max = 0; } if (m_fptr->s_pos_arr !=NULL) { free(m_fptr->s_pos_arr); m_fptr->s_pos_arr = NULL; } if (m_fptr->a_pos_arr!=NULL) { free(m_fptr->a_pos_arr); m_fptr->a_pos_arr = NULL; } if (m_fptr->hfile !=NULL ) { fclose(m_fptr->hfile); m_fptr->hfile=NULL; free(m_fptr->d_pos_arr); m_fptr->d_pos_arr = NULL; } if (m_fptr->oid_list != NULL) { free(m_fptr->oid_list); m_fptr->oid_list = NULL; m_fptr->oid_seqs = m_fptr->max_oid = 0; }#ifdef use_mmap if (m_fptr->mm_flg) { munmap(m_fptr->mmap_base,m_fptr->st_size); m_fptr->mmap_fd = -1; } else #endif if (m_fptr->libf !=NULL ) {fclose(m_fptr->libf); m_fptr->libf=NULL;}}intncbl2_getliba_o(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ int tpos; unsigned int t_mask, t_shift, oid_mask; /* get to the next valid pointer */ for ( tpos = m_fd->lpos ;tpos <= m_fd->max_oid; tpos++) { t_mask = tpos / 32; t_shift = 31 - (tpos % 32); if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; } if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) { if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0); m_fd->lpos = tpos; /* already bumped up */ m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos]; return ncbl2_getliba(seq, maxs, libstr, n_libstr, libpos, lcont, m_fd, l_off); } } return -1;}intncbl2_getliba(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ unsigned char *sptr, *dptr; int s_chunk, d_len, lib_cnt; long seqcnt; long tmp; static long seq_len; int sel_status;#if defined(DEBUG) || defined(PCOMPLIB) int gi, my_db, taxid; char acc[20], title[21], name[20];#endif *l_off = 1; start_seq: lib_cnt = m_fd->lpos; *libpos = (fseek_t)m_fd->lpos; if (*lcont==0) { if (lib_cnt >= m_fd->max_cnt) return -1; /* no more sequences */ seq_len = m_fd->s_pos_arr[lib_cnt+1] - m_fd->s_pos_arr[lib_cnt]; /* value is +1 off to get the NULL */ if (m_fd->mm_flg) m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lib_cnt];#if !defined(DEBUG) && !defined(PCOMPLIB) libstr[0]='\0';#else /* get the name from the header file */ fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0); if (m_fd->bl_format_ver == FORMATDBV3) { d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1); fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile); libstr[d_len]='\0'; } else { d_len = min(m_fd->tmp_buf_max,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1); fread(m_fd->tmp_buf,(size_t)1,(size_t)d_len,m_fd->hfile); parse_fastadl_asn((unsigned char *)m_fd->tmp_buf, (unsigned char *)m_fd->tmp_buf+d_len, &gi, &my_db, acc, name, title, 20, &taxid); if (m_fd->sel_acc_p && (sel_status = m_fd->sel_acc_p(NULL, gi, m_fd->sel_local))<=0) { if (sel_status < 0) return (-1); m_fd->lpos++; goto start_seq; } sprintf(libstr,"gi|%d",gi); }#endif } if (seq_len <= maxs) { /* sequence fits */ seqcnt = seq_len; m_fd->lpos++; *lcont = 0; } else { /* doesn't fit */ seqcnt = maxs-1; (*lcont)++; } if (m_fd->mm_flg) sptr = (unsigned char *)m_fd->mmap_addr; else { if ((tmp=fread(seq,(size_t)1,(size_t)seq_len,m_fd->libf))!=(size_t)seq_len) { fprintf(stderr," could not read sequence record: %ld %ld != %ld\n", *libpos,tmp,seq_len); goto error; } sptr = seq; } if (seq_len <= maxs) {seqcnt = --seq_len;} /* everything is ready, set up dst. pointer, seq_len */ dptr = seq; if (aa_b2toa[sptr[seq_len-1]]=='*') seq_len--; /* if the two alphabets are the same, don't convert */ if (!aa_btof_null) { s_chunk = seqcnt/16; while (s_chunk-- > 0) { *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; *dptr++ = aa_btof[*sptr++]; } while (dptr < seq+seqcnt) *dptr++ = aa_btof[*sptr++]; } if (m_fd->mm_flg) m_fd->mmap_addr = (char *)sptr; /* we didn't get it all, so reset for more */ if (*lcont) seq_len -= seqcnt; seq[seqcnt]= EOSEQ; return (seqcnt); error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos); fflush(stderr); return (-1);}int ncbl2_getlibn_o(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ int tpos; unsigned int t_mask, t_shift, oid_mask; /* get to the next valid pointer */ for (tpos = m_fd->lpos; tpos <= m_fd->max_oid; tpos++) { t_mask = tpos / 32; t_shift = 31 - (tpos % 32); if ((oid_mask = m_fd->oid_list[t_mask])==0) { continue; } if ((bl2_uint4_cvt(oid_mask) & 0x1 << t_shift)) { if (!m_fd->mm_flg) fseek(m_fd->libf,m_fd->s_pos_arr[tpos],0); m_fd->lpos = tpos; /* already bumped up */ m_fd->bl_lib_pos = m_fd->s_pos_arr[tpos]; return ncbl2_getlibn(seq, maxs, libstr, n_libstr, libpos, lcont, m_fd, l_off); } } return -1;}static char tmp_amb[4096];intncbl2_getlibn(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ unsigned char *sptr, *tptr, stmp; long seqcnt; int s_chunk, lib_cnt; size_t tmp; char ch; static long seq_len; static int c_len,c_pad; int c_len_set, d_len; *l_off = 1; lib_cnt = m_fd->lpos; *libpos = (fseek_t)lib_cnt; if (*lcont==0) { /* not a continuation of previous */ if (lib_cnt >= m_fd->max_cnt) return (-1); c_len = m_fd->a_pos_arr[lib_cnt]- m_fd->s_pos_arr[lib_cnt]; if (!m_fd->mm_flg) { if (m_fd->bl_lib_pos != m_fd->s_pos_arr[lib_cnt]) { /* are we positioned to read? */ amb_cnt++; if ((m_fd->bl_lib_pos - m_fd->s_pos_arr[lib_cnt]) < sizeof(tmp_amb)) { /* jump over amb_ray */ fread(tmp_amb,(size_t)1,(size_t)(m_fd->s_pos_arr[lib_cnt]-m_fd->bl_lib_pos),m_fd->libf); } else { /* fseek over amb_ray */ fseek(m_fd->libf,m_fd->s_pos_arr[lib_cnt],0); } m_fd->bl_lib_pos = m_fd->s_pos_arr[lib_cnt]; } } else m_fd->mmap_addr = m_fd->mmap_base + m_fd->s_pos_arr[lib_cnt];#if !defined(DEBUG) && !defined(PCOMPLIB) libstr[0]='\0';#else /* get the name from the header file */ fseek(m_fd->hfile,m_fd->d_pos_arr[lib_cnt],0); d_len = min(n_libstr-1,m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]-1); fread(libstr,(size_t)1,(size_t)d_len,m_fd->hfile); libstr[d_len]='\0';#endif } /* end of *lcont==0 */ /* To avoid the situation where c_len <= 1; we must anticipate what c_len will be after this pass. If it will be <= 64, back off this time so next time it will be > 64 */ seq_len = c_len*4; if ((seq_len+4 > maxs) && (seq_len+4 - maxs <= 256)) { /* we won't be done but we will have less than 256 to go */ c_len -= 64; seq_len -= 256; c_len_set = 1; maxs -= 256;} else c_len_set = 0; /* fprintf(stderr," lib_cnt: %d %d %d %d\n",lib_cnt,c_len,seq_len,maxs); */ /* does the rest of the sequence fit? */ if (seq_len <= maxs-4 && !c_len_set) { seqcnt = c_len; if (!m_fd->mm_flg) { if ((tmp=fread(seq,(size_t)1,(size_t)seqcnt,m_fd->libf))!=(size_t)seqcnt) { fprintf(stderr, " could not read sequence record: %s %lld %ld != %ld: %d\n", libstr,*libpos,tmp,seqcnt,*seq); goto error; } m_fd->bl_lib_pos += tmp; sptr = seq + seqcnt; } else sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt); *lcont = 0; /* this is the last chunk */ lib_cnt++; /* increment to the next sequence */ /* the last byte is either '0' (no remainder) or the last 1-3 chars and the remainder */ c_pad = *(sptr-1); c_pad &= 0x3; /* get the last (low) 2 bits */ seq_len -= (4 - c_pad); /* if the last 2 bits are 0, its a NULL byte */ } else { /* get the next chunk, but more to come */ seqcnt = ((maxs+3)/4)-1; if (!m_fd->mm_flg) { if ((tmp=fread(seq,(size_t)1,(size_t)(seqcnt),m_fd->libf))!=(size_t)(seqcnt)) { fprintf(stderr," could not read sequence record: %lld %ld/%ld\n", *libpos,tmp,seqcnt); goto error; } m_fd->bl_lib_pos += tmp; sptr = seq + seqcnt; } else { sptr = (unsigned char *)(m_fd->mmap_addr+seqcnt); m_fd->mmap_addr += seqcnt; } seq_len = 4*seqcnt; c_len -= seqcnt; if (c_len_set) {c_len += 64; maxs += 256;} (*lcont)++;/* hopefully we don't need this because of c_len -= 64. *//* if (c_len == 1) {#if !defined (USE_MMAP) c_pad = fgetc(m_fd->libf); *sptr=c_pad;#else c_pad = *m_fd->mmap_addr++; sptr = m_fd->mmap_addr;#endif c_pad &= 0x3; seq_len += c_pad; seqcnt++; lib_cnt++; *lcont = 0; }*/ } /* point to the last packed byte and to the end of the array seqcnt is the exact number of bytes read tptr points to the destination, use multiple of 4 to simplify math sptr points to the source, note that the last byte will be read 4 cycles before it is written */ tptr = seq + 4*seqcnt; s_chunk = seqcnt/8; while (s_chunk-- > 0) { stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; } while (tptr>seq) { stmp = *--sptr; *--tptr = (stmp&3) +1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; *--tptr = ((stmp >>= 2)&3)+1; } /* for (sptr=seq; sptr < seq+seq_len; sptr++) { printf("%c",nt[*sptr]); if ((int)(sptr-seq) % 60 == 59) printf("\n"); } printf("\n"); */ m_fd->lpos = lib_cnt; if (seqcnt*4 >= seq_len) { /* there was enough room */ seq[seq_len]= EOSEQ; /* printf("%d\n",seq_len); */ return seq_len; } else { /* not enough room */ seq[seqcnt*4]=EOSEQ; seq_len -= 4*seqcnt; return (4*seqcnt); } error: fprintf(stderr," error reading %s at %ld\n",libstr,*libpos); fflush(stderr); return (-1);} /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 */static char*db_type_arr[] = {"lcl","gib","gim","gii","gb","emb","pir","sp", "pat","ref","gnl","gi","dbj","prf","pdb","tpg", "tpe","tpd"};voidncbl2_ranlib(char *str, int cnt, fseek_t libpos, char *libstr, struct lmf_str *m_fd){ int llen, lib_cnt; char *bp; unsigned char *my_buff=NULL; char descr[2048]; unsigned char *abp; int gi, taxid; int my_db; char db[5], acc[20], name[20]; char title[1024]; int have_my_buff=0; int have_descr = 0; lib_cnt = (int)libpos; llen = m_fd->d_pos_arr[lib_cnt+1]-m_fd->d_pos_arr[lib_cnt]; fseek(m_fd->hfile,m_fd->d_pos_arr[libpos],0); if (m_fd->bl_format_ver == FORMATDBV3) { if (llen >= cnt) llen = cnt-1; fread(str,(size_t)1,(size_t)(llen),m_fd->hfile); } else { if (llen >= m_fd->tmp_buf_max) { if ((my_buff=(unsigned char *)calloc(llen,sizeof(char)))==NULL) { fprintf(stderr," cannot allocate ASN.1 buffer: %d\n",llen); my_buff = (unsigned char *)m_fd->tmp_buf; llen = m_fd->tmp_buf_max; } else have_my_buff = 1; } else { my_buff = (unsigned char *)m_fd->tmp_buf; } abp = my_buff; fread(my_buff,(size_t)1,llen,m_fd->hfile); do { abp = parse_fastadl_asn(abp, my_buff+llen, &gi, &my_db, acc, name, title, sizeof(title), &taxid); if (gi > 0) { sprintf(descr,"gi|%d|%s|%s|%s ",gi,db_type_arr[my_db],acc,name); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -