⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mmgetaa.c

📁 序列对齐 Compare a protein sequence to a protein sequence database or a DNA sequence to a DNA sequenc
💻 C
📖 第 1 页 / 共 2 页
字号:
    m_fd->lpos = lpos;  }  *seqp = EOSEQ;  /*   if ((int)(seqp-seq)==0) return 1; */  return (int)(seqp-seq);}voidaranlibm(char *str,	 int cnt,	 fseek_t libpos,	 char *libstr,	 struct lmf_str *m_fd){  char *bp;  int llen;  int lpos;  lpos = (int) libpos;  llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];  if (llen >= cnt) llen = cnt-1;  strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen);  str[llen]='\0';  if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';  bp = str;  while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';  m_fd->lpos = lpos;}/* there is no vgetlibm() because vgetlibm() and agetlibm() are   identical - the difference in the two file formats relates to the   location of the sequence, which is already available in spos_arr[].   however vranlibm must accomodate both type 5 and 6 files;   type 6 has extra stuff after the seq_id.*/voidvranlibm(char *str,	 int cnt,	 fseek_t libpos,	 char *libstr,	 struct lmf_str *m_fd){  char *bp, *mp;  int llen;  int lpos;  lpos = (int)libpos;  llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];  mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos];    strncpy(str,mp+4,20);  str[20]='\0';  if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0';  else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' ';  bp = strchr(mp,'\n');  llen -= (bp-mp)-5;  if (llen >  cnt-strlen(str)) llen = cnt-strlen(str)-1;  strncat(str,bp+1,llen);  if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';  str[cnt-1]='\0';  m_fd->lpos = lpos;}voidclose_mmap(struct lmf_str *m_fd) {  free(m_fd->s_pos_arr);  free(m_fd->d_pos_arr);  if (m_fd->mm_flg) {    munmap(m_fd->mmap_base,m_fd->st_size);    free(m_fd);  }  m_fd->mm_flg=0;}  #ifndef min#define min(x,y) ((x) > (y) ? (y) : (x))#endifstatic int gcg_bton[4]={2,4,1,3};intgcg_getlibm(unsigned char *seq,	    int maxs,	    char *libstr,	    int n_libstr,	    fseek_t *libpos,	    int *lcont,	    struct lmf_str *m_fd,	    long *l_off){  char dummy[20];  char gcg_date[6];  char gcg_type[10];  register unsigned char *cp, *seqp, stmp;  register int *ap, lpos;  unsigned char *seqm, *seqm1;  long r_block, b_block, r_fact, r16_block;  *l_off = 1;  seqp = seq;  seqm = &seq[maxs-9];  seqm1 = seqm-1;  ap = m_fd->sascii;  lpos = m_fd->lpos;   if (*lcont==0) {    if (lpos >= m_fd->max_cnt) return (-1);    sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n",	   libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len));    m_fd->gcg_binary = (gcg_type[0]=='2');    libstr[12]='\0';    *libpos = lpos;    m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];  }  r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len);  if (m_fd->gcg_binary) {    r_block = (r_block+3)/4;  }  cp=(unsigned char *)m_fd->mmap_addr;   if (!m_fd->gcg_binary) {    r_fact = 1;    r16_block = r_block/16;    while (r16_block-- > 0) {      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];      *seqp++ = ap[*cp++];    }    while (seqp<seq+r_block) *seqp++ = ap[*cp++];  }  else if (m_fd->gcg_binary) {    r_fact = 4;    r16_block = r_block/8;    while(r16_block-- > 0) {      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];    }    while (seqp < seq+4*r_block) {      stmp = *cp++;      *seqp++ = gcg_bton[(stmp>>6) &3];      *seqp++ = gcg_bton[(stmp>>4) &3];      *seqp++ = gcg_bton[(stmp>>2) &3];      *seqp++ = gcg_bton[(stmp) &3];    }  }  if (r_fact * r_block >= m_fd->gcg_len) {    *lcont = 0;    m_fd->lpos++;  }  else {    if (m_fd->gcg_binary) b_block = 4*r_block;    m_fd->gcg_len -= b_block;    (*lcont)++;  }  seq[b_block] = EOSEQ;  /*   if (b_block==0) return 1; else */  return b_block;}void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);intlgetlibm(unsigned char *seq,	 int maxs,	 char *libstr,	 int n_libstr,	 fseek_t *libpos,	 int *lcont,	 struct lmf_str *m_fd,	 long *l_off){  register unsigned char *cp, *seqp;  register int *ap, lpos;  unsigned char *seqm, *seqm1;  *l_off = 1;  seqp = seq;  seqm = &seq[maxs-11];  seqm1 = seqm-1;  lpos = m_fd->lpos;  ap = m_fd->sascii;  if (*lcont==0) {    if (lpos >= m_fd->max_cnt) return (-1);    if (n_libstr <= 21) {      strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12);      libstr[12]='\0';    }    else {      lget_ann_m(m_fd,libstr,n_libstr);    }    *libpos = lpos;    m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];    cp = (unsigned char *)m_fd->mmap_addr;  }  else cp = (unsigned char *)m_fd->mmap_addr;  while (seqp<seqm1) {    if (*cp=='/' && *(cp-1)=='\n') break;    if ((*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA &&	(*seqp++=ap[*cp++])<NA) continue;    --seqp;    if (*cp=='\n' && *(cp+1)==' ') cp += 11;  }  if (seqp>=seqm1) {    (*lcont)++;    m_fd->mmap_addr = (char *)cp;  }  else {    *lcont=0;    m_fd->lpos++;  }  *seqp = EOSEQ;  return (int)(seqp-seq);}voidlget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) {  char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];  /* copy in locus from lm_fd->lline */  strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus));  if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';  /* get description */  mgets(desc,sizeof(desc),lm_fd);  while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))    mgets(desc,sizeof(desc),lm_fd);  if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';  /* get accession */  mgets(acc,sizeof(acc),lm_fd);  while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {    mgets(acc,sizeof(acc),lm_fd);    if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)      break;  }  if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';  if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';  /* get version */  mgets(ver,sizeof(ver),lm_fd);  while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {    mgets(ver,sizeof(ver),lm_fd);    if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)      break;  }  if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';      /* extract gi:123456 from version line */  bp_gid = strchr(&ver[12],':');  if (bp_gid != NULL) {    if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';    bp_gid++;  }  if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';      /* build up FASTA header line */  if (bp_gid != NULL) {    strncpy(libstr,"gi|",n_libstr-1);    strncat(libstr,bp_gid,n_libstr-4);    strncat(libstr,"|gb|",n_libstr-20);  }  else {libstr[0]='\0';}  /* if we have a version number, use it, otherwise accession, 	 otherwise locus/description */  if (ver[0]=='V') {    strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));    strncat(libstr,"|",n_libstr-1-strlen(libstr));  }  else if (acc[0]=='A') {    strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));    strncat(libstr," ",n_libstr-1-strlen(libstr));  }  strncat(libstr,locus,n_libstr-1-strlen(libstr));  strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));  libstr[n_libstr-1]='\0';}voidlranlibm(char *str,	 int cnt,	 fseek_t seek,	 char *libstr,	 struct lmf_str *m_fd){  char *bp, *llp;  char acc[MAXLINE], desc[MAXLINE];  llp = m_fd->mmap_addr = m_fd->mmap_base + m_fd->d_pos_arr[seek];  lget_ann_m(m_fd,str,cnt);  str[cnt-1]='\0';  m_fd->lpos = seek;}static int check_status=0;voidcheck_mmap(struct lmf_str *m_fd,long ntt) {  int i, seq_len, ok_stat;    ok_stat = 1;  if ( ++check_status > 5) return;  fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt);  for (i=0; i<m_fd->max_cnt; i++) {    seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i];    if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {      fprintf(stderr,"%d:\t%ld\t%ld\t%ld\n",	      i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i],	      m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]);      ok_stat=0;    }  }  if (ok_stat) {    if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n",			      m_fd->lb_name,ntt);  }}#ifdef DEBUG/*  C H K 3  --  Compute a type-3 Kermit block check.  *//* Calculate the 16-bit CRC of a null-terminated string using a byte-oriented tableless algorithm invented by Andy Lowry (Columbia University).  The magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1. Note - this function could be adapted for strings containing imbedded 0's by including a length argument.*/longcrck(s,n)    char *s; int n;{    unsigned int c, q;    long crc = 0;    while (n-->0) {	c = *s++;	/* if (parity)*/	c &= 0177;	q = (crc ^ c) & 017;		/* Low-order nibble */	crc = (crc >> 4) ^ (q * 010201);	q = (crc ^ (c >> 4)) & 017;	/* High order nibble */	crc = (crc >> 4) ^ (q * 010201);    }    return(crc);}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -