ureadseq.c

来自「EM算法的改进」· C语言 代码 · 共 1,911 行 · 第 1/4 页

C
1,911
字号
  /* this is a somewhat shaky end,    1st char of line is non-blank for seq. title  */  *addend = false;  *ungetend= true;  return( *V->s != ' ' );}Local void readFitch(struct ReadSeqVars *V){  boolean first;  first = true;  while (!V->allDone) {    if (!first) strcpy(V->seqid, V->s);    readLoop(0, first, endFitch, V);    if (feof(V->f)) V->allDone = true;    first = false;    }}Local void readPlain(struct ReadSeqVars *V){  V->nseq++;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  addseq(V->seqid, V);   /*from above..*/  if (V->fname!=NULL) sprintf(V->seqid, "%s  [Unknown form]", V->fname);  else sprintf(V->seqid, "  [Unknown form]");  do {    addseq(V->s, V);    V->done = feof(V->f);    getline1(V);  } while (!V->done);  if (V->choice == kListSequences) addinfo(V->seqid, V);  V->allDone = true;}Local void readUWGCG(struct ReadSeqVars *V){/*10nov91: Reading GCG files casued duplication of last line when         EOF followed that line !!!    fix: getline1 now sets *V->s = 0*/  char  *si;  V->nseq++;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  strcpy(V->seqid, V->s);  /*writeseq: "    %s  Length: %d  (today)  Check: %d  ..\n" */  /*drop above or ".." from id*/  if ( (si = strstr(V->seqid,"  Length: ")) ) *si = 0;  else if ( (si = strstr(V->seqid,"..")) ) *si = 0;  do {    V->done = feof(V->f);    getline1(V);    if (!V->done) addseq((V->s), V);  } while (!V->done);  if (V->choice == kListSequences) addinfo(V->seqid, V);  V->allDone = true;}Local void readOlsen(struct ReadSeqVars *V){ /* G. Olsen /print output from multiple sequence editor */  char    *si, *sj, *sk, *sm=NULL, sid[40], snum[20];  boolean indata = false;  int snumlen = 0;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  rewind(V->f); V->nseq= 0;  do {    getline1(V);    V->done = feof(V->f);    if (V->done && !(*V->s)) break;    else if (indata) {      if ( (si= strstr(V->s, sid))        /* && (strstr(V->s, snum) == si - snumlen - 1) ) { */        && (sm= strstr(V->s, snum)) && (sm < si - snumlen) ) {        /* Spaces are valid alignment data !! *//* 17Oct91: Error, the left margin is 21 not 22! *//* dropped some nucs up to now -- my example file was right shifted ! *//* variable right id margin, drop id-2 spaces at end *//*  VMS CC COMPILER (VAXC031) mess up:  -- Index of 21 is chopping 1st nuc on VMS systems Only!  Byte-for-byte same ame rnasep.olsen sequence file !*/        /* si = (V->s)+21; < was this before VMS CC wasted my time */        si += 10;  /* use strstr index plus offset to outfox VMS CC bug */        if ( (sk = strstr(si, sid)) ) *(sk-2) = 0;        for (sk = si; *sk != 0; sk++) {           if (*sk == ' ') *sk = '.';           /* 18aug92: !! some olsen masks are NUMBERS !! which addseq eats */           else if (isdigit((int)*sk)) *sk= nonummask[*sk - '0'];           }        addseq(si, V);        }      }    else if ( (sk = strstr(V->s, "): ")) ) {  /* seq info header line */  /* 18aug92: correct for diff seqs w/ same name -- use number, e.g. */  /*   3 (Agr.tume):  agrobacterium.prna  18-JUN-1987 16:12 */  /* 328 (Agr.tume):  agrobacterium.prna XYZ  19-DEC-1992   */      (V->nseq)++;      si = 1 + strchr(V->s,'(');      *sk = ' ';      if (V->choice == kListSequences) addinfo( si, V);      else if (V->nseq == V->choice) {        strcpy(V->seqid, si);        sj = strchr(V->seqid, ':');        while (*(--sj) == ' ') ;        while (--sj != V->seqid) { if (*sj == ' ') *sj = '_'; }        *sk = 0;        while (*(--sk) == ' ') *sk = 0;        strcpy(sid, si);        si= V->s;        while ((*si <= ' ') && (*si != 0)) si++;        snumlen=0;        while (si[snumlen] > ' ' && snumlen<20)         { snum[snumlen]= si[snumlen]; snumlen++; }        snum[snumlen]= 0;        }      }    else if (strstr(V->s,"identity:   Data:")) {      indata = true;      if (V->choice == kListSequences) V->done = true;      }  } while (!V->done);  V->allDone = true;} /*readOlsen*/Local void readMSF(struct ReadSeqVars *V){ /* gcg's MSF, mult. sequence format, interleaved ! */  char    *si, *sj, sid[128];  boolean indata = false;  int     iline= 0;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  rewind(V->f); V->nseq= 0;  do {    getline1(V);    V->done = feof(V->f);    if (V->done && !(*V->s)) break;    else if (indata) {      /*somename  ...gpvedai .......t.. aaigr..vad tvgtgptnse aipaltaaet */      /*       E  gvenae.kgv tentna.tad fvaqpvylpe .nqt...... kv.affynrs */      si= V->s;      skipwhitespace(si);      /* for (sj= si; isalnum((int)*sj); sj++) ; bug -- cdelwiche uses "-", "_" and others in names*/      for (sj= si; *sj > ' '; sj++) ;      *sj= 0;      if ( *si ) {        if ( (0==strcmp(si, sid)) ) {          addseq(sj+1, V);          }        iline++;        }      }    else if (NULL != (si = strstr(V->s, "Name: "))) {  /* seq info header line */      /* Name: somename      Len:   100  Check: 7009  Weight:  1.00 */      (V->nseq)++;      si += 6;      if (V->choice == kListSequences) addinfo( si, V);      else if (V->nseq == V->choice) {        strcpy(V->seqid, si);        si = V->seqid;        skipwhitespace(si);        /* for (sj= si; isalnum((int)*sj); sj++) ; -- bug */        for (sj= si; *sj > ' '; sj++) ;        *sj= 0;        strcpy(sid, si);        }      }    else if ( strstr(V->s,"//") /*== V->s*/ )  {      indata = true;      iline= 0;      if (V->choice == kListSequences) V->done = true;      }  } while (!V->done);  V->allDone = true;} /*readMSF*/Local void readPAUPinterleaved(struct ReadSeqVars *V){ /* PAUP mult. sequence format, interleaved or sequential! */  char    *si, *sj, *send, sid[40], sid1[40], saveseq[255];  boolean first = true, indata = false, domatch;  int     iline= 0, ifmc, saveseqlen=0;#define fixmatchchar(s) { \  for (ifmc=0; ifmc<saveseqlen; ifmc++) \    if (s[ifmc] == V->matchchar) s[ifmc]= saveseq[ifmc]; }  V->addit = (V->choice > 0);  V->seqlencount = 0;  if (V->addit) V->seqlen = 0;  /* rewind(V->f); V->nseq= 0;  << do in caller !*/  indata= true; /* call here after we find "matrix" */  domatch= (V->matchchar > 0);  do {    getline1(V);    V->done = feof(V->f);    if (V->done && !(*V->s)) break;    else if (indata) {      /* [         1                    1                    1         ]*/      /* human     aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/      /* chimp     ................a.t. .c.................a ..........*/      /* !! need to correct for V->matchchar */      si= V->s;      skipwhitespace(si);      if (strchr(si,';')) indata= false;      if (isalnum((int)*si))  {        /* valid data line starts w/ a left-justified seq name in columns [0..8] */        if (first) {          (V->nseq)++;          if (V->nseq >= V->topnseq) first= false;          for (sj = si; isalnum((int)*sj); sj++) ;          send= sj;          skipwhitespace(sj);          if (V->choice == kListSequences) {            *send= 0;            addinfo( si, V);            }          else if (V->nseq == V->choice) {            if (domatch) {              if (V->nseq == 1) { strcpy( saveseq, sj); saveseqlen= strlen(saveseq); }              else fixmatchchar( sj);              }            addseq(sj, V);            *send= 0;            strcpy(V->seqid, si);            strcpy(sid, si);            if (V->nseq == 1) strcpy(sid1, sid);            }          }        else if ( (strstr(si, sid) == si) ){          while (isalnum((int)*si)) si++;          skipwhitespace(si);          if (domatch) {            if (V->nseq == 1) { strcpy( saveseq, si); saveseqlen= strlen(saveseq); }            else fixmatchchar( si);            }          addseq(si, V);          }        else if (domatch && (strstr(si, sid1) == si)) {          strcpy( saveseq, si);          saveseqlen= strlen(saveseq);          }        iline++;        }      }    else if ( strstr(V->s,"matrix") )  {      indata = true;      iline= 0;      if (V->choice == kListSequences) V->done = true;      }  } while (!V->done);  V->allDone = true;} /*readPAUPinterleaved*/Local void readPAUPsequential(struct ReadSeqVars *V){ /* PAUP mult. sequence format, interleaved or sequential! */  char    *si, *sj;  boolean atname = true, indata = false;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  V->seqlencount = 0;  /* rewind(V->f); V->nseq= 0;  << do in caller !*/  indata= true; /* call here after we find "matrix" */  do {    getline1(V);    V->done = feof(V->f);    if (V->done && !(*V->s)) break;    else if (indata) {      /* [         1                    1                    1         ]*/      /* human     aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/      /*           aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/      /* chimp     ................a.t. .c.................a ..........*/      /*           ................a.t. .c.................a ..........*/      si= V->s;      skipwhitespace(si);      if (strchr(si,';')) indata= false;      if (isalnum((int)*si))  {        /* valid data line starts w/ a left-justified seq name in columns [0..8] */        if (atname) {          (V->nseq)++;          V->seqlencount = 0;          atname= false;          sj= si+1;          while (isalnum((int)*sj)) sj++;          if (V->choice == kListSequences) {            /* !! we must count bases to know when topseqlen is reached ! */            countseq(sj, V);            if (V->seqlencount >= V->topseqlen) atname= true;            *sj= 0;            addinfo( si, V);            }          else if (V->nseq == V->choice) {            addseq(sj, V);            V->seqlencount= V->seqlen;            if (V->seqlencount >= V->topseqlen) atname= true;            *sj= 0;            strcpy(V->seqid, si);            }          else {            countseq(sj, V);            if (V->seqlencount >= V->topseqlen) atname= true;            }          }        else if (V->nseq == V->choice) {          addseq(V->s, V);          V->seqlencount= V->seqlen;          if (V->seqlencount >= V->topseqlen) atname= true;          }        else {          countseq(V->s, V);          if (V->seqlencount >= V->topseqlen) atname= true;          }        }      }    else if ( strstr(V->s,"matrix") )  {      indata = true;      atname= true;      if (V->choice == kListSequences) V->done = true;      }  } while (!V->done);  V->allDone = true;} /*readPAUPsequential*/Local void readPhylipInterleaved(struct ReadSeqVars *V){  char    *si, *sj;  boolean first = true;  int     iline= 0;  V->addit = (V->choice > 0);  if (V->addit) V->seqlen = 0;  V->seqlencount = 0;  /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); << topnseq == 0 !!! bad scan !! */  si= V->s;  skipwhitespace(si);  V->topnseq= atoi(si);  while (isdigit((int)*si)) si++;  skipwhitespace(si);  V->topseqlen= atol(si);  /* fprintf(stderr,"Phylip-ileaf: topnseq=%d  topseqlen=%d\n",V->topnseq, V->topseqlen); */  do {    getline1(V);    V->done = feof(V->f);    if (V->done && !(*V->s)) break;    si= V->s;    skipwhitespace(si);    if (*si != 0) {      if (first) {  /* collect seq names + seq, as fprintf(outf,"%-10s  ",seqname); */        (V->nseq)++;        if (V->nseq >= V->topnseq) first= false;        sj= V->s+10;  /* past name, start of data */        if (V->choice == kListSequences) {          *sj= 0;          addinfo( si, V);          }        else if (V->nseq == V->choice) {          addseq(sj, V);          *sj= 0;          strcpy(V->seqid, si);          }        }      else if ( iline % V->nseq == V->choice -1 ) {        addseq(si, V);        }      iline++;    }  } while (!V->done);  V->allDone = true;} /*readPhylipInterleaved*/Local boolean endPhylipSequential( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){  *addend = false;  *ungetend= false;  countseq( V->s, V);  return V->seqlencount >= V->topseqlen;}Local void readPhylipSequential(struct ReadSeqVars *V){  short  i;  char  *si;  /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); < ? bad sscan ? */  si= V->s;  skipwhitespace(si);  V->topnseq= atoi(si);  while (isdigit((int)*si)) si++;  skipwhitespace(si);  V->topseqlen= atol(si);  getline1(V);  while (!V->allDone) {    V->seqlencount= 0;    strncpy(V->seqid, (V->s), 10);    V->seqid[10]= 0;    for (i=0; i<10 && V->s[i]; i++) V->s[i]= ' ';    readLoop(0, true, endPhylipSequential, V);    if (feof(V->f)) V->allDone = true;    }}Local void readSeqMain(      struct ReadSeqVars *V,      const long  skiplines_,      const short format_){#define tolowerstr(s) { long Itlwr, Ntlwr= strlen(s); \  for (Itlwr=0; Itlwr<Ntlwr; Itlwr++) s[Itlwr]= to_lower(s[Itlwr]); }  boolean gotuw;  long l;  V->linestart= 0;  V->matchchar= 0;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?