ureadseq.c
来自「EM算法的改进」· C语言 代码 · 共 1,911 行 · 第 1/4 页
C
1,911 行
/* this is a somewhat shaky end, 1st char of line is non-blank for seq. title */ *addend = false; *ungetend= true; return( *V->s != ' ' );}Local void readFitch(struct ReadSeqVars *V){ boolean first; first = true; while (!V->allDone) { if (!first) strcpy(V->seqid, V->s); readLoop(0, first, endFitch, V); if (feof(V->f)) V->allDone = true; first = false; }}Local void readPlain(struct ReadSeqVars *V){ V->nseq++; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; addseq(V->seqid, V); /*from above..*/ if (V->fname!=NULL) sprintf(V->seqid, "%s [Unknown form]", V->fname); else sprintf(V->seqid, " [Unknown form]"); do { addseq(V->s, V); V->done = feof(V->f); getline1(V); } while (!V->done); if (V->choice == kListSequences) addinfo(V->seqid, V); V->allDone = true;}Local void readUWGCG(struct ReadSeqVars *V){/*10nov91: Reading GCG files casued duplication of last line when EOF followed that line !!! fix: getline1 now sets *V->s = 0*/ char *si; V->nseq++; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; strcpy(V->seqid, V->s); /*writeseq: " %s Length: %d (today) Check: %d ..\n" */ /*drop above or ".." from id*/ if ( (si = strstr(V->seqid," Length: ")) ) *si = 0; else if ( (si = strstr(V->seqid,"..")) ) *si = 0; do { V->done = feof(V->f); getline1(V); if (!V->done) addseq((V->s), V); } while (!V->done); if (V->choice == kListSequences) addinfo(V->seqid, V); V->allDone = true;}Local void readOlsen(struct ReadSeqVars *V){ /* G. Olsen /print output from multiple sequence editor */ char *si, *sj, *sk, *sm=NULL, sid[40], snum[20]; boolean indata = false; int snumlen = 0; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; rewind(V->f); V->nseq= 0; do { getline1(V); V->done = feof(V->f); if (V->done && !(*V->s)) break; else if (indata) { if ( (si= strstr(V->s, sid)) /* && (strstr(V->s, snum) == si - snumlen - 1) ) { */ && (sm= strstr(V->s, snum)) && (sm < si - snumlen) ) { /* Spaces are valid alignment data !! *//* 17Oct91: Error, the left margin is 21 not 22! *//* dropped some nucs up to now -- my example file was right shifted ! *//* variable right id margin, drop id-2 spaces at end *//* VMS CC COMPILER (VAXC031) mess up: -- Index of 21 is chopping 1st nuc on VMS systems Only! Byte-for-byte same ame rnasep.olsen sequence file !*/ /* si = (V->s)+21; < was this before VMS CC wasted my time */ si += 10; /* use strstr index plus offset to outfox VMS CC bug */ if ( (sk = strstr(si, sid)) ) *(sk-2) = 0; for (sk = si; *sk != 0; sk++) { if (*sk == ' ') *sk = '.'; /* 18aug92: !! some olsen masks are NUMBERS !! which addseq eats */ else if (isdigit((int)*sk)) *sk= nonummask[*sk - '0']; } addseq(si, V); } } else if ( (sk = strstr(V->s, "): ")) ) { /* seq info header line */ /* 18aug92: correct for diff seqs w/ same name -- use number, e.g. */ /* 3 (Agr.tume): agrobacterium.prna 18-JUN-1987 16:12 */ /* 328 (Agr.tume): agrobacterium.prna XYZ 19-DEC-1992 */ (V->nseq)++; si = 1 + strchr(V->s,'('); *sk = ' '; if (V->choice == kListSequences) addinfo( si, V); else if (V->nseq == V->choice) { strcpy(V->seqid, si); sj = strchr(V->seqid, ':'); while (*(--sj) == ' ') ; while (--sj != V->seqid) { if (*sj == ' ') *sj = '_'; } *sk = 0; while (*(--sk) == ' ') *sk = 0; strcpy(sid, si); si= V->s; while ((*si <= ' ') && (*si != 0)) si++; snumlen=0; while (si[snumlen] > ' ' && snumlen<20) { snum[snumlen]= si[snumlen]; snumlen++; } snum[snumlen]= 0; } } else if (strstr(V->s,"identity: Data:")) { indata = true; if (V->choice == kListSequences) V->done = true; } } while (!V->done); V->allDone = true;} /*readOlsen*/Local void readMSF(struct ReadSeqVars *V){ /* gcg's MSF, mult. sequence format, interleaved ! */ char *si, *sj, sid[128]; boolean indata = false; int iline= 0; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; rewind(V->f); V->nseq= 0; do { getline1(V); V->done = feof(V->f); if (V->done && !(*V->s)) break; else if (indata) { /*somename ...gpvedai .......t.. aaigr..vad tvgtgptnse aipaltaaet */ /* E gvenae.kgv tentna.tad fvaqpvylpe .nqt...... kv.affynrs */ si= V->s; skipwhitespace(si); /* for (sj= si; isalnum((int)*sj); sj++) ; bug -- cdelwiche uses "-", "_" and others in names*/ for (sj= si; *sj > ' '; sj++) ; *sj= 0; if ( *si ) { if ( (0==strcmp(si, sid)) ) { addseq(sj+1, V); } iline++; } } else if (NULL != (si = strstr(V->s, "Name: "))) { /* seq info header line */ /* Name: somename Len: 100 Check: 7009 Weight: 1.00 */ (V->nseq)++; si += 6; if (V->choice == kListSequences) addinfo( si, V); else if (V->nseq == V->choice) { strcpy(V->seqid, si); si = V->seqid; skipwhitespace(si); /* for (sj= si; isalnum((int)*sj); sj++) ; -- bug */ for (sj= si; *sj > ' '; sj++) ; *sj= 0; strcpy(sid, si); } } else if ( strstr(V->s,"//") /*== V->s*/ ) { indata = true; iline= 0; if (V->choice == kListSequences) V->done = true; } } while (!V->done); V->allDone = true;} /*readMSF*/Local void readPAUPinterleaved(struct ReadSeqVars *V){ /* PAUP mult. sequence format, interleaved or sequential! */ char *si, *sj, *send, sid[40], sid1[40], saveseq[255]; boolean first = true, indata = false, domatch; int iline= 0, ifmc, saveseqlen=0;#define fixmatchchar(s) { \ for (ifmc=0; ifmc<saveseqlen; ifmc++) \ if (s[ifmc] == V->matchchar) s[ifmc]= saveseq[ifmc]; } V->addit = (V->choice > 0); V->seqlencount = 0; if (V->addit) V->seqlen = 0; /* rewind(V->f); V->nseq= 0; << do in caller !*/ indata= true; /* call here after we find "matrix" */ domatch= (V->matchchar > 0); do { getline1(V); V->done = feof(V->f); if (V->done && !(*V->s)) break; else if (indata) { /* [ 1 1 1 ]*/ /* human aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/ /* chimp ................a.t. .c.................a ..........*/ /* !! need to correct for V->matchchar */ si= V->s; skipwhitespace(si); if (strchr(si,';')) indata= false; if (isalnum((int)*si)) { /* valid data line starts w/ a left-justified seq name in columns [0..8] */ if (first) { (V->nseq)++; if (V->nseq >= V->topnseq) first= false; for (sj = si; isalnum((int)*sj); sj++) ; send= sj; skipwhitespace(sj); if (V->choice == kListSequences) { *send= 0; addinfo( si, V); } else if (V->nseq == V->choice) { if (domatch) { if (V->nseq == 1) { strcpy( saveseq, sj); saveseqlen= strlen(saveseq); } else fixmatchchar( sj); } addseq(sj, V); *send= 0; strcpy(V->seqid, si); strcpy(sid, si); if (V->nseq == 1) strcpy(sid1, sid); } } else if ( (strstr(si, sid) == si) ){ while (isalnum((int)*si)) si++; skipwhitespace(si); if (domatch) { if (V->nseq == 1) { strcpy( saveseq, si); saveseqlen= strlen(saveseq); } else fixmatchchar( si); } addseq(si, V); } else if (domatch && (strstr(si, sid1) == si)) { strcpy( saveseq, si); saveseqlen= strlen(saveseq); } iline++; } } else if ( strstr(V->s,"matrix") ) { indata = true; iline= 0; if (V->choice == kListSequences) V->done = true; } } while (!V->done); V->allDone = true;} /*readPAUPinterleaved*/Local void readPAUPsequential(struct ReadSeqVars *V){ /* PAUP mult. sequence format, interleaved or sequential! */ char *si, *sj; boolean atname = true, indata = false; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; V->seqlencount = 0; /* rewind(V->f); V->nseq= 0; << do in caller !*/ indata= true; /* call here after we find "matrix" */ do { getline1(V); V->done = feof(V->f); if (V->done && !(*V->s)) break; else if (indata) { /* [ 1 1 1 ]*/ /* human aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/ /* aagcttcaccggcgcagtca ttctcataatcgcccacggR cttacatcct*/ /* chimp ................a.t. .c.................a ..........*/ /* ................a.t. .c.................a ..........*/ si= V->s; skipwhitespace(si); if (strchr(si,';')) indata= false; if (isalnum((int)*si)) { /* valid data line starts w/ a left-justified seq name in columns [0..8] */ if (atname) { (V->nseq)++; V->seqlencount = 0; atname= false; sj= si+1; while (isalnum((int)*sj)) sj++; if (V->choice == kListSequences) { /* !! we must count bases to know when topseqlen is reached ! */ countseq(sj, V); if (V->seqlencount >= V->topseqlen) atname= true; *sj= 0; addinfo( si, V); } else if (V->nseq == V->choice) { addseq(sj, V); V->seqlencount= V->seqlen; if (V->seqlencount >= V->topseqlen) atname= true; *sj= 0; strcpy(V->seqid, si); } else { countseq(sj, V); if (V->seqlencount >= V->topseqlen) atname= true; } } else if (V->nseq == V->choice) { addseq(V->s, V); V->seqlencount= V->seqlen; if (V->seqlencount >= V->topseqlen) atname= true; } else { countseq(V->s, V); if (V->seqlencount >= V->topseqlen) atname= true; } } } else if ( strstr(V->s,"matrix") ) { indata = true; atname= true; if (V->choice == kListSequences) V->done = true; } } while (!V->done); V->allDone = true;} /*readPAUPsequential*/Local void readPhylipInterleaved(struct ReadSeqVars *V){ char *si, *sj; boolean first = true; int iline= 0; V->addit = (V->choice > 0); if (V->addit) V->seqlen = 0; V->seqlencount = 0; /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); << topnseq == 0 !!! bad scan !! */ si= V->s; skipwhitespace(si); V->topnseq= atoi(si); while (isdigit((int)*si)) si++; skipwhitespace(si); V->topseqlen= atol(si); /* fprintf(stderr,"Phylip-ileaf: topnseq=%d topseqlen=%d\n",V->topnseq, V->topseqlen); */ do { getline1(V); V->done = feof(V->f); if (V->done && !(*V->s)) break; si= V->s; skipwhitespace(si); if (*si != 0) { if (first) { /* collect seq names + seq, as fprintf(outf,"%-10s ",seqname); */ (V->nseq)++; if (V->nseq >= V->topnseq) first= false; sj= V->s+10; /* past name, start of data */ if (V->choice == kListSequences) { *sj= 0; addinfo( si, V); } else if (V->nseq == V->choice) { addseq(sj, V); *sj= 0; strcpy(V->seqid, si); } } else if ( iline % V->nseq == V->choice -1 ) { addseq(si, V); } iline++; } } while (!V->done); V->allDone = true;} /*readPhylipInterleaved*/Local boolean endPhylipSequential( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= false; countseq( V->s, V); return V->seqlencount >= V->topseqlen;}Local void readPhylipSequential(struct ReadSeqVars *V){ short i; char *si; /* sscanf( V->s, "%d%d", &V->topnseq, &V->topseqlen); < ? bad sscan ? */ si= V->s; skipwhitespace(si); V->topnseq= atoi(si); while (isdigit((int)*si)) si++; skipwhitespace(si); V->topseqlen= atol(si); getline1(V); while (!V->allDone) { V->seqlencount= 0; strncpy(V->seqid, (V->s), 10); V->seqid[10]= 0; for (i=0; i<10 && V->s[i]; i++) V->s[i]= ' '; readLoop(0, true, endPhylipSequential, V); if (feof(V->f)) V->allDone = true; }}Local void readSeqMain( struct ReadSeqVars *V, const long skiplines_, const short format_){#define tolowerstr(s) { long Itlwr, Ntlwr= strlen(s); \ for (Itlwr=0; Itlwr<Ntlwr; Itlwr++) s[Itlwr]= to_lower(s[Itlwr]); } boolean gotuw; long l; V->linestart= 0; V->matchchar= 0;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?