📄 mmgetaa.c
字号:
m_fd->lpos = lpos; } *seqp = EOSEQ; /* if ((int)(seqp-seq)==0) return 1; */ return (int)(seqp-seq);}voidaranlibm(char *str, int cnt, fseek_t libpos, char *libstr, struct lmf_str *m_fd){ char *bp; int llen; int lpos; lpos = (int) libpos; llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos]; if (llen >= cnt) llen = cnt-1; strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen); str[llen]='\0'; if ((bp = strchr(str,'\r'))!=NULL) *bp='\0'; if ((bp = strchr(str,'\n'))!=NULL) *bp='\0'; bp = str; while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' '; m_fd->lpos = lpos;}/* there is no vgetlibm() because vgetlibm() and agetlibm() are identical - the difference in the two file formats relates to the location of the sequence, which is already available in spos_arr[]. however vranlibm must accomodate both type 5 and 6 files; type 6 has extra stuff after the seq_id.*/voidvranlibm(char *str, int cnt, fseek_t libpos, char *libstr, struct lmf_str *m_fd){ char *bp, *mp; int llen; int lpos; lpos = (int)libpos; llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos]; mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos]; strncpy(str,mp+4,20); str[20]='\0'; if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0'; else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' '; bp = strchr(mp,'\n'); llen -= (bp-mp)-5; if (llen > cnt-strlen(str)) llen = cnt-strlen(str)-1; strncat(str,bp+1,llen); if ((bp = strchr(str,'\n'))!=NULL) *bp='\0'; str[cnt-1]='\0'; m_fd->lpos = lpos;}voidclose_mmap(struct lmf_str *m_fd) { free(m_fd->s_pos_arr); free(m_fd->d_pos_arr); if (m_fd->mm_flg) { munmap(m_fd->mmap_base,m_fd->st_size); free(m_fd); } m_fd->mm_flg=0;} #ifndef min#define min(x,y) ((x) > (y) ? (y) : (x))#endifstatic int gcg_bton[4]={2,4,1,3};intgcg_getlibm(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ char dummy[20]; char gcg_date[6]; char gcg_type[10]; register unsigned char *cp, *seqp, stmp; register int *ap, lpos; unsigned char *seqm, *seqm1; long r_block, b_block, r_fact, r16_block; *l_off = 1; seqp = seq; seqm = &seq[maxs-9]; seqm1 = seqm-1; ap = m_fd->sascii; lpos = m_fd->lpos; if (*lcont==0) { if (lpos >= m_fd->max_cnt) return (-1); sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n", libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len)); m_fd->gcg_binary = (gcg_type[0]=='2'); libstr[12]='\0'; *libpos = lpos; m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos]; } r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len); if (m_fd->gcg_binary) { r_block = (r_block+3)/4; } cp=(unsigned char *)m_fd->mmap_addr; if (!m_fd->gcg_binary) { r_fact = 1; r16_block = r_block/16; while (r16_block-- > 0) { *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; *seqp++ = ap[*cp++]; } while (seqp<seq+r_block) *seqp++ = ap[*cp++]; } else if (m_fd->gcg_binary) { r_fact = 4; r16_block = r_block/8; while(r16_block-- > 0) { stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; } while (seqp < seq+4*r_block) { stmp = *cp++; *seqp++ = gcg_bton[(stmp>>6) &3]; *seqp++ = gcg_bton[(stmp>>4) &3]; *seqp++ = gcg_bton[(stmp>>2) &3]; *seqp++ = gcg_bton[(stmp) &3]; } } if (r_fact * r_block >= m_fd->gcg_len) { *lcont = 0; m_fd->lpos++; } else { if (m_fd->gcg_binary) b_block = 4*r_block; m_fd->gcg_len -= b_block; (*lcont)++; } seq[b_block] = EOSEQ; /* if (b_block==0) return 1; else */ return b_block;}void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);intlgetlibm(unsigned char *seq, int maxs, char *libstr, int n_libstr, fseek_t *libpos, int *lcont, struct lmf_str *m_fd, long *l_off){ register unsigned char *cp, *seqp; register int *ap, lpos; unsigned char *seqm, *seqm1; *l_off = 1; seqp = seq; seqm = &seq[maxs-11]; seqm1 = seqm-1; lpos = m_fd->lpos; ap = m_fd->sascii; if (*lcont==0) { if (lpos >= m_fd->max_cnt) return (-1); if (n_libstr <= 21) { strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12); libstr[12]='\0'; } else { lget_ann_m(m_fd,libstr,n_libstr); } *libpos = lpos; m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos]; cp = (unsigned char *)m_fd->mmap_addr; } else cp = (unsigned char *)m_fd->mmap_addr; while (seqp<seqm1) { if (*cp=='/' && *(cp-1)=='\n') break; if ((*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA && (*seqp++=ap[*cp++])<NA) continue; --seqp; if (*cp=='\n' && *(cp+1)==' ') cp += 11; } if (seqp>=seqm1) { (*lcont)++; m_fd->mmap_addr = (char *)cp; } else { *lcont=0; m_fd->lpos++; } *seqp = EOSEQ; return (int)(seqp-seq);}voidlget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) { char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120]; /* copy in locus from lm_fd->lline */ strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus)); if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0'; /* get description */ mgets(desc,sizeof(desc),lm_fd); while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10)) mgets(desc,sizeof(desc),lm_fd); if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0'; /* get accession */ mgets(acc,sizeof(acc),lm_fd); while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) { mgets(acc,sizeof(acc),lm_fd); if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0) break; } if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0'; if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0'; /* get version */ mgets(ver,sizeof(ver),lm_fd); while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) { mgets(ver,sizeof(ver),lm_fd); if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0) break; } if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0'; /* extract gi:123456 from version line */ bp_gid = strchr(&ver[12],':'); if (bp_gid != NULL) { if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0'; bp_gid++; } if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0'; /* build up FASTA header line */ if (bp_gid != NULL) { strncpy(libstr,"gi|",n_libstr-1); strncat(libstr,bp_gid,n_libstr-4); strncat(libstr,"|gb|",n_libstr-20); } else {libstr[0]='\0';} /* if we have a version number, use it, otherwise accession, otherwise locus/description */ if (ver[0]=='V') { strncat(libstr,&ver[12],n_libstr-1-strlen(libstr)); strncat(libstr,"|",n_libstr-1-strlen(libstr)); } else if (acc[0]=='A') { strncat(libstr,&acc[12],n_libstr-1-strlen(libstr)); strncat(libstr," ",n_libstr-1-strlen(libstr)); } strncat(libstr,locus,n_libstr-1-strlen(libstr)); strncat(libstr,&desc[11],n_libstr-1-strlen(libstr)); libstr[n_libstr-1]='\0';}voidlranlibm(char *str, int cnt, fseek_t seek, char *libstr, struct lmf_str *m_fd){ char *bp, *llp; char acc[MAXLINE], desc[MAXLINE]; llp = m_fd->mmap_addr = m_fd->mmap_base + m_fd->d_pos_arr[seek]; lget_ann_m(m_fd,str,cnt); str[cnt-1]='\0'; m_fd->lpos = seek;}static int check_status=0;voidcheck_mmap(struct lmf_str *m_fd,long ntt) { int i, seq_len, ok_stat; ok_stat = 1; if ( ++check_status > 5) return; fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt); for (i=0; i<m_fd->max_cnt; i++) { seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i]; if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) { fprintf(stderr,"%d:\t%ld\t%ld\t%ld\n", i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i], m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]); ok_stat=0; } } if (ok_stat) { if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n", m_fd->lb_name,ntt); }}#ifdef DEBUG/* C H K 3 -- Compute a type-3 Kermit block check. *//* Calculate the 16-bit CRC of a null-terminated string using a byte-oriented tableless algorithm invented by Andy Lowry (Columbia University). The magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1. Note - this function could be adapted for strings containing imbedded 0's by including a length argument.*/longcrck(s,n) char *s; int n;{ unsigned int c, q; long crc = 0; while (n-->0) { c = *s++; /* if (parity)*/ c &= 0177; q = (crc ^ c) & 017; /* Low-order nibble */ crc = (crc >> 4) ^ (q * 010201); q = (crc ^ (c >> 4)) & 017; /* High order nibble */ crc = (crc >> 4) ^ (q * 010201); } return(crc);}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -