⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sqio.c

📁 hmmer源程序
💻 C
📖 第 1 页 / 共 4 页
字号:
    }  /* Unambiguous decisions:   */  if (nother)         return kOtherSeq;  if (namino == nseq) return kAmino;  if (ndna   == nseq) return kDNA;  if (nrna   == nseq) return kRNA;  /* Ambiguous decisions:   */  if (namino == 0)    return kRNA; /* it's nucleic acid, but seems mixed RNA/DNA */  return kAmino;		   /* some amino acid seen; others probably short seqs, some 				      of which may be entirely ACGT (ala,cys,gly,thr). We 				      could be a little more sophisticated: U would be a giveaway				      that we're not in protein seqs */}/* Function: WriteSimpleFASTA() * Date:     SRE, Tue Nov 16 18:06:00 1999 [St. Louis] * * Purpose:  Just write a FASTA format sequence to a file; *           minimal interface, mostly for quick and dirty programs. * * Args:     fp   - open file handle (stdout, possibly) *           seq  - sequence to output *           name - name for the sequence *           desc - optional description line, or NULL. * * Returns:  void */voidWriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc){  char buf[61];  int  len;  int  pos;    len = strlen(seq);  buf[60] = '\0';  fprintf(fp, ">%s %s\n", name, desc != NULL ? desc : "");  for (pos = 0; pos < len; pos += 60)    {      strncpy(buf, seq+pos, 60);      fprintf(fp, "%s\n", buf);    }}intWriteSeq(FILE *outf, int outform, char *seq, SQINFO *sqinfo){  int   numline = 0;  int   lines = 0, spacer = 0, width  = 50, tab = 0;  int   i, j, l, l1, ibase;  char  endstr[10];  char  s[100];			/* buffer for sequence  */  char  ss[100];		/* buffer for structure */  int   checksum = 0;  int   seqlen;     int   which_case;    /* 0 = do nothing. 1 = upper case. 2 = lower case */  int   dostruc;		/* TRUE to print structure lines*/  which_case = 0;  dostruc    = FALSE;		  seqlen     = (sqinfo->flags & SQINFO_LEN) ? sqinfo->len : strlen(seq);  if (IsAlignmentFormat(outform))     Die("Tried to write an aligned format with WriteSeq() -- bad, bad.");  strcpy( endstr,"");  l1 = 0;  checksum = GCGchecksum(seq, seqlen);  switch (outform) {  case SQFILE_UNKNOWN:    /* no header, just sequence */    strcpy(endstr,"\n"); /* end w/ extra blank line */    break;  case SQFILE_GENBANK:    fprintf(outf,"LOCUS       %s       %d bp\n", 	    (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name,	    seqlen);    fprintf(outf,"DEFINITION  %s\n", 	    (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");    fprintf(outf,"ACCESSION   %s\n", 	    (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");    fprintf(outf,"ORIGIN      \n");    spacer = 11;    numline = 1;    strcpy(endstr, "\n//");    break;  case SQFILE_GCGDATA:    fprintf(outf, ">>>>%s  9/95  ASCII  Len: %d\n", sqinfo->name, seqlen);    fprintf(outf, "%s\n", (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");    break;  case SQFILE_PIR:    fprintf(outf, "ENTRY          %s\n", 	    (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);    fprintf(outf, "TITLE          %s\n", 	    (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");    fprintf(outf, "ACCESSION      %s\n",	    (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");    fprintf(outf, "SUMMARY                                #Length %d  #Checksum  %d\n",	    sqinfo->len, checksum);    fprintf(outf, "SEQUENCE\n");    fprintf(outf, "                  5        10        15        20        25        30\n");    spacer  = 2;		/* spaces after every residue */    numline = 1;              /* number lines w/ coords     */    width   = 30;             /* 30 aa per line             */    strcpy(endstr, "\n///");    break;  case SQFILE_SQUID:    fprintf(outf, "NAM  %s\n", sqinfo->name);    if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))      fprintf(outf, "SRC  %s %s %d..%d::%d\n",	      (sqinfo->flags & SQINFO_ID)    ? sqinfo->id     : "-",	      (sqinfo->flags & SQINFO_ACC)   ? sqinfo->acc    : "-",	      (sqinfo->flags & SQINFO_START) ? sqinfo->start  : 0,	      (sqinfo->flags & SQINFO_STOP)  ? sqinfo->stop   : 0,	      (sqinfo->flags & SQINFO_OLEN)  ? sqinfo->olen   : 0);    if (sqinfo->flags & SQINFO_DESC)      fprintf(outf, "DES  %s\n", sqinfo->desc);    if (sqinfo->flags & SQINFO_SS)      {	fprintf(outf, "SEQ  +SS\n");	dostruc = TRUE;	/* print structure lines too */      }    else      fprintf(outf, "SEQ\n");    numline = 1;                /* number seq lines w/ coords  */    strcpy(endstr, "\n++");    break;  case SQFILE_EMBL:    fprintf(outf,"ID   %s\n",	    (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);    fprintf(outf,"AC   %s\n",	    (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");    fprintf(outf,"DE   %s\n", 	    (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");    fprintf(outf,"SQ             %d BP\n", seqlen);    strcpy(endstr, "\n//"); /* 11Oct90: bug fix*/    tab = 5;     /** added 31jan91 */    spacer = 11; /** added 31jan91 */    break;  case SQFILE_GCG:    fprintf(outf,"%s\n", sqinfo->name);    if (sqinfo->flags & SQINFO_ACC)      fprintf(outf,"ACCESSION   %s\n", sqinfo->acc);     if (sqinfo->flags & SQINFO_DESC)      fprintf(outf,"DEFINITION  %s\n", sqinfo->desc);    fprintf(outf,"    %s  Length: %d  (today)  Check: %d  ..\n", 	    sqinfo->name, seqlen, checksum);    spacer = 11;    numline = 1;    strcpy(endstr, "\n");  /* this is insurance to help prevent misreads at eof */    break;  case SQFILE_STRIDER: /* ?? map ?*/    fprintf(outf,"; ### from DNA Strider ;-)\n");    fprintf(outf,"; DNA sequence  %s, %d bases, %d checksum.\n;\n", 	    sqinfo->name, seqlen, checksum);    strcpy(endstr, "\n//");    break;    /* SRE: Don had Zuker default to Pearson, which is not			   intuitive or helpful, since Zuker's MFOLD can't read			   Pearson format. More useful to use kIG */  case SQFILE_ZUKER:    which_case = 1;			/* MFOLD requires upper case. */    /*FALLTHRU*/  case SQFILE_IG:    fprintf(outf,";%s %s\n", 	    sqinfo->name,	    (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "");    fprintf(outf,"%s\n", sqinfo->name);    strcpy(endstr,"1"); /* == linear dna */    break;  case SQFILE_RAW:			/* Raw: no header at all. */    break;  default :  case SQFILE_FASTA:    fprintf(outf,">%s %s\n", sqinfo->name,	    (sqinfo->flags & SQINFO_DESC)  ? sqinfo->desc   : "");    break;  }  if (which_case == 1) s2upper(seq);  if (which_case == 2) s2lower(seq);  width = MIN(width,100);  for (i=0, l=0, ibase = 1, lines = 0; i < seqlen; ) {    if (l1 < 0) l1 = 0;    else if (l1 == 0) {      if (numline) fprintf(outf,"%8d ",ibase);      for (j=0; j<tab; j++) fputc(' ',outf);    }    if ((spacer != 0) && ((l+1) % spacer == 1))       { s[l] = ' '; ss[l] = ' '; l++; }    s[l]  = seq[i];    ss[l] = (sqinfo->flags & SQINFO_SS) ? sqinfo->ss[i] : '.';    l++; i++;    l1++;                 /* don't count spaces for width*/    if (l1 == width || i == seqlen) {      s[l] = ss[l] = '\0';      l = 0; l1 = 0;      if (dostruc)	{	  fprintf(outf, "%s\n", s);	  if (numline) fprintf(outf,"         ");	  for (j=0; j<tab; j++) fputc(' ',outf);	  if (i == seqlen) fprintf(outf,"%s%s\n",ss,endstr);	  else fprintf(outf,"%s\n",ss);	}      else	{	  if (i == seqlen) fprintf(outf,"%s%s\n",s,endstr);	  else fprintf(outf,"%s\n",s);	}      lines++;      ibase = i+1;    }  }  return lines;} /* Function: ReadMultipleRseqs() *  * Purpose:  Open a data file and *           parse it into an array of rseqs (raw, unaligned *           sequences). *  *           Caller is responsible for free'ing memory allocated *           to ret_rseqs, ret_weights, and ret_names. *            *           Weights are currently only supported for MSF format. *           Sequences read from all other formats will be assigned *           weights of 1.0. If the caller isn't interested in *           weights, it passes NULL as ret_weights. *  * Returns 1 on success. Returns 0 on failure and sets * squid_errno to indicate the cause. */intReadMultipleRseqs(char              *seqfile,		  int                fformat,		  char            ***ret_rseqs,		  SQINFO **ret_sqinfo,		  int               *ret_num){  SQINFO *sqinfo;               /* array of sequence optional info         */  SQFILE *dbfp;                 /* open ptr for sequential access of file  */  char  **rseqs;                /* sequence array                          */  int     numalloced;           /* num of seqs currently alloced for       */  int     num;  num        = 0;  numalloced = 16;  rseqs  = (char **) MallocOrDie (numalloced * sizeof(char *));  sqinfo = (SQINFO *) MallocOrDie (numalloced * sizeof(SQINFO));  if ((dbfp = SeqfileOpen(seqfile, fformat, NULL)) == NULL) return 0;        while (ReadSeq(dbfp, dbfp->format, &rseqs[num], &(sqinfo[num])))    {      num++;      if (num == numalloced) /* more seqs coming, alloc more room */	{	  numalloced += 16;	  rseqs  = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *));	  sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO));	}    }  SeqfileClose(dbfp);  *ret_rseqs  = rseqs;  *ret_sqinfo = sqinfo;  *ret_num    = num;  return 1;}/* Function: String2SeqfileFormat() * Date:     SRE, Sun Jun 27 15:25:54 1999 [TW 723 over Canadian Shield] * * Purpose:  Convert a string (e.g. from command line option arg) *           to a format code. Case insensitive. Return *           MSAFILE_UNKNOWN/SQFILE_UNKNOWN if string is bad.   *           Uses codes defined in squid.h (unaligned formats) and *           msa.h (aligned formats). * * Args:     s   - string to convert; e.g. "stockholm" * * Returns:  format code; e.g. MSAFILE_STOCKHOLM */intString2SeqfileFormat(char *s){  char *s2;  int   code = SQFILE_UNKNOWN;  if (s == NULL) return SQFILE_UNKNOWN;  s2 = sre_strdup(s, -1);  s2upper(s2);    if      (strcmp(s2, "FASTA")     == 0) code = SQFILE_FASTA;  else if (strcmp(s2, "GENBANK")   == 0) code = SQFILE_GENBANK;  else if (strcmp(s2, "EMBL")      == 0) code = SQFILE_EMBL;  else if (strcmp(s2, "GCG")       == 0) code = SQFILE_GCG;  else if (strcmp(s2, "GCGDATA")   == 0) code = SQFILE_GCGDATA;  else if (strcmp(s2, "RAW")       == 0) code = SQFILE_RAW;  else if (strcmp(s2, "IG")        == 0) code = SQFILE_IG;  else if (strcmp(s2, "STRIDER")   == 0) code = SQFILE_STRIDER;  else if (strcmp(s2, "IDRAW")     == 0) code = SQFILE_IDRAW;  else if (strcmp(s2, "ZUKER")     == 0) code = SQFILE_ZUKER;  else if (strcmp(s2, "PIR")       == 0) code = SQFILE_PIR;  else if (strcmp(s2, "SQUID")     == 0) code = SQFILE_SQUID;  else if (strcmp(s2, "STOCKHOLM") == 0) code = MSAFILE_STOCKHOLM;  else if (strcmp(s2, "SELEX")     == 0) code = MSAFILE_SELEX;   else if (strcmp(s2, "MSF")       == 0) code = MSAFILE_MSF;   else if (strcmp(s2, "CLUSTAL")   == 0) code = MSAFILE_CLUSTAL;   else if (strcmp(s2, "A2M")       == 0) code = MSAFILE_A2M;   else if (strcmp(s2, "PHYLIP")    == 0) code = MSAFILE_PHYLIP;   else if (strcmp(s2, "EPS")       == 0) code = MSAFILE_EPS;   free(s2);  return code;}char *SeqfileFormat2String(int code){  switch (code) {  case SQFILE_UNKNOWN:   return "unknown";  case SQFILE_FASTA:     return "FASTA";  case SQFILE_GENBANK:   return "Genbank";  case SQFILE_EMBL:      return "EMBL";   case SQFILE_GCG:       return "GCG";  case SQFILE_GCGDATA:   return "GCG data library";  case SQFILE_RAW:       return "raw";   case SQFILE_IG:        return "Intelligenetics";  case SQFILE_STRIDER:   return "MacStrider";  case SQFILE_IDRAW:     return "Idraw Postscript";  case SQFILE_ZUKER:     return "Zuker";   case SQFILE_PIR:       return "PIR";  case SQFILE_SQUID:     return "SQUID";  case MSAFILE_STOCKHOLM: return "Stockholm";  case MSAFILE_SELEX:     return "SELEX";  case MSAFILE_MSF:       return "MSF";  case MSAFILE_CLUSTAL:   return "Clustal";  case MSAFILE_A2M:       return "a2m";  case MSAFILE_PHYLIP:    return "Phylip";  case MSAFILE_EPS:       return "EPS";  default:                   Die("Bad code passed to MSAFormat2String()");  }  /*NOTREACHED*/  return NULL;}/* Function: MSAToSqinfo() * Date:     SRE, Tue Jul 20 14:36:56 1999 [St. Louis] * * Purpose:  Take an MSA and generate a SQINFO array suitable *           for use in annotating the unaligned sequences. *           Return the array. *            *           Permanent temporary code. sqinfo was poorly designed. *           it must eventually be replaced, but the odds *           of this happening soon are nil, so I have to deal. * * Args:     msa   - the alignment * * Returns:  ptr to allocated sqinfo array. *           Freeing is ghastly: free in each individual sqinfo[i]  *           with FreeSequence(NULL, &(sqinfo[i])), then *           free(sqinfo). */SQINFO *MSAToSqinfo(MSA *msa){  int     idx;  SQINFO *sqinfo;  sqinfo = MallocOrDie(sizeof(SQINFO) * msa->nseq);  for (idx = 0; idx < msa->nseq; idx++)    {      sqinfo[idx].flags = 0;      SetSeqinfoString(&(sqinfo[idx]), 		       msa->sqname[idx],                 SQINFO_NAME);      SetSeqinfoString(&(sqinfo[idx]), 		       MSAGetSeqAccession(msa, idx),     SQINFO_ACC);      SetSeqinfoString(&(sqinfo[idx]), 		       MSAGetSeqDescription(msa, idx),   SQINFO_DESC);      if (msa->ss != NULL && msa->ss[idx] != NULL) {	MakeDealignedString(msa->aseq[idx], msa->alen, 			    msa->ss[idx], &(sqinfo[idx].ss));	sqinfo[idx].flags |= SQINFO_SS;      }      if (msa->sa != NULL && msa->sa[idx] != NULL) {	MakeDealignedString(msa->aseq[idx], msa->alen, 			    msa->sa[idx], &(sqinfo[idx].sa));	sqinfo[idx].flags |= SQINFO_SA;      }      sqinfo[idx].len    = DealignedLength(msa->aseq[idx]);      sqinfo[idx].flags |= SQINFO_LEN;    }  return sqinfo;}/* cc -o sqio_test -DA_QUIET_DAY -L. sqio.c -lsquid */#ifdef A_QUIET_DAY#include "ssi.h"intmain(int argc, char **argv){  FILE *fp;  char *filename;  char *buf;  int   len;  int   mode = 3;  SSIOFFSET off;  filename = argv[1];  if (mode == 1) {    buf = malloc(sizeof(char) * 256);    if ((fp = fopen(filename, "r")) == NULL)      Die("open of %s failed", filename);     while (fgets(buf, 255, fp) != NULL)      ;    fclose(fp);    free(buf);  } else if (mode == 2) {    if ((fp = fopen(filename, "r")) == NULL)      Die("open of %s failed", filename);     buf = NULL; len = 0;    while (sre_fgets(&buf, &len, fp) != NULL)      SSIGetFilePosition(fp, SSI_OFFSET_I32, &off);     fclose(fp);    free(buf);  } else if (mode == 3) {    SQFILE *dbfp;    SQINFO  info;    if ((dbfp = SeqfileOpen(filename, SQFILE_FASTA, NULL)) == NULL)      Die("open of %s failed", filename);     while (ReadSeq(dbfp, dbfp->format, &buf, &info)) {       SSIGetFilePosition(dbfp->f, SSI_OFFSET_I32, &off);       FreeSequence(buf, &info);    }    SeqfileClose(dbfp);  }      } #endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -