ureadseq.c
来自「EM算法的改进」· C语言 代码 · 共 1,911 行 · 第 1/4 页
C
1,911 行
/* * $Id: ureadseq.c 1339 2006-09-21 19:46:28Z tbailey $ * * $Log$ * Revision 1.2 2006/03/08 20:50:11 nadya * merge chamges from v3_5_2 branch * * Revision 1.1.1.1.4.1 2006/01/26 08:34:26 tbailey * Renamed local function getline() to getline1() to avoid conflict * with system function defined in stdio.h * * Revision 1.1.1.1 2005/07/29 17:19:22 nadya * Importing from meme-3.0.14, and adding configure/make * *//* File: ureadseq.c * * Reads and writes nucleic/protein sequence in various * formats. Data files may have multiple sequences. * * Copyright 1990 by d.g.gilbert * biology dept., indiana university, bloomington, in 47405 * e-mail: gilbertd@bio.indiana.edu * * This program may be freely copied and used by anyone. * Developers are encourged to incorporate parts in their * programs, rather than devise their own private sequence * format. * * This should compile and run with any ANSI C compiler. * */#define UREADSEQ_G#include "ureadseq.h"/* strlcpy is missing from some LINUX */#if defined(Linux)static size_t strlcpy(char *dst, const char *src, size_t dstsize){ int i; for (i=0; src[i] != '\0'; i++) { if (i<dstsize) dst[i] = src[i]; } if (i<dstsize) dst[i] = '\0'; else dst[dstsize-1] = '\0'; return(i); }#endifint Strcasecmp(const char *a, const char *b) /* from Nlm_StrICmp */{ int diff, done; if (a == b) return 0; done = 0; while (! done) { diff = to_upper(*a) - to_upper(*b); if (diff) return diff; if (*a == '\0') done = 1; else { a++; b++; } } return 0;}int Strncasecmp(const char *a, const char *b, long maxn) /* from Nlm_StrNICmp */{ int diff, done; if (a == b) return 0; done = 0; while (! done) { diff = to_upper(*a) - to_upper(*b); if (diff) return diff; if (*a == '\0') done = 1; else { a++; b++; maxn--; if (! maxn) done = 1; } } return 0;}#ifndef Local# define Local static /* local functions */#endif#define kStartLength 500const char *aminos = "ABCDEFGHIKLMNPQRSTVWXYZ*";const char *primenuc = "ACGTU";const char *protonly = "EFIPQZ";const char kNocountsymbols[5] = "_.-?";const char stdsymbols[6] = "_.-*?";const char allsymbols[32] = "_.-*?<>{}[]()!@#$%^&=+;:'/|`~\"\\";static const char *seqsymbols = allsymbols;const char nummask[11] = "0123456789";const char nonummask[11] = "~!@#$%^&*(";/* use general form of isseqchar -- all chars + symbols. no formats except nbrf (?) use symbols in data area as anything other than sequence chars.*/ /* Local variables for readSeq: */struct ReadSeqVars { short choice, err, nseq; long seqlen, maxseq, seqlencount; short topnseq; long topseqlen; const char *fname; char *seq, *seqid, matchchar; boolean allDone, done, filestart, addit; FILE *f; long linestart; char s[MAXLINE], *sp; int (*isseqchar)(int c); /* tlb 3/1/96 */ /*int (*isseqchar)();*/ /* int (*isseqchar)(int c); << sgi cc hates (int c) */};int isSeqChar(int c){ return (isalpha((int)c) || strchr(seqsymbols,c));}int isSeqNumChar(int c){ return (isalnum((int)c) || strchr(seqsymbols,c));}int isAnyChar(int c){ return isascii(c); /* wrap in case isascii is macro */}Local void readline(FILE *f, char *s, long *linestart){ char *cp; *linestart= ftell(f); if (NULL == fgets(s, MAXLINE, f)) *s = 0; else { cp = strchr(s, '\n'); if (cp != NULL) *cp = 0; }}Local void getline1(struct ReadSeqVars *V){ readline(V->f, V->s, &V->linestart);}Local void ungetline(struct ReadSeqVars *V){ fseek(V->f, V->linestart, 0);}Local void addseq(char *s, struct ReadSeqVars *V){ char *ptr; if (V->addit) while (*s != 0) { if ((V->isseqchar)(*s)) { if (V->seqlen >= V->maxseq) { V->maxseq += kStartLength; ptr = (char*) realloc(V->seq, V->maxseq+1); if (ptr==NULL) { V->err = eMemFull; return; } else V->seq = ptr; } V->seq[(V->seqlen)++] = *s; } s++; }}Local void countseq(char *s, struct ReadSeqVars *V) /* this must count all valid seq chars, for some formats (paup-sequential) even if we are skipping seq... */{ while (*s != 0) { if ((V->isseqchar)(*s)) { (V->seqlencount)++; } s++; }}Local void addinfo(char *s, struct ReadSeqVars *V){ char s2[256], *si; boolean saveadd; si = s2; while (*s == ' ') s++; sprintf(si, " %d) %s\n", V->nseq, s); saveadd = V->addit; V->addit = true; V->isseqchar = isAnyChar; addseq( si, V); V->addit = saveadd; V->isseqchar = isSeqChar;}Local void readLoop(short margin, boolean addfirst, boolean (*endTest)(boolean *addend, boolean *ungetend, struct ReadSeqVars *V), struct ReadSeqVars *V){ boolean addend = false; boolean ungetend = false; V->nseq++; if (V->choice == kListSequences) V->addit = false; else V->addit = (V->nseq == V->choice); if (V->addit) V->seqlen = 0; if (addfirst) addseq(V->s, V); do { getline1(V); V->done = feof(V->f); V->done |= (*endTest)( &addend, &ungetend, V); if (V->addit && (addend || !V->done) && ((int) strlen(V->s) > margin)) { addseq( (V->s)+margin, V); } } while (!V->done); if (V->choice == kListSequences) addinfo(V->seqid, V); else { V->allDone = (V->nseq >= V->choice); if (V->allDone && ungetend) ungetline(V); }}Local boolean endIG( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = true; /* 1 or 2 occur in line w/ bases */ *ungetend= false; return((strchr(V->s,'1')!=NULL) || (strchr(V->s,'2')!=NULL));}Local void readIG(struct ReadSeqVars *V){/* 18Aug92: new IG format -- ^L between sequences in place of ";" */ char *si; while (!V->allDone) { do { getline1(V); for (si= V->s; *si != 0 && *si < ' '; si++) *si= ' '; /* drop controls */ if (*si == 0) *V->s= 0; /* chop line to empty */ } while (! (feof(V->f) || ((*V->s != 0) && (*V->s != ';') ) )); if (feof(V->f)) V->allDone = true; else { strcpy(V->seqid, V->s); readLoop(0, false, endIG, V); } }}Local boolean endStrider( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= false; return (strstr( V->s, "//") != NULL);}Local void readStrider(struct ReadSeqVars *V){ /* ? only 1 seq/file ? */ while (!V->allDone) { getline1(V); if (strstr(V->s,"; DNA sequence ") == V->s) strcpy(V->seqid, (V->s)+16); else strcpy(V->seqid, (V->s)+1); while ((!feof(V->f)) && (*V->s == ';')) { getline1(V); } if (feof(V->f)) V->allDone = true; else readLoop(0, true, endStrider, V); }}Local boolean endPIR( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= (strstr(V->s,"ENTRY") == V->s); return ((strstr(V->s,"///") != NULL) || *ungetend);}Local void readPIR(struct ReadSeqVars *V){ /*PIR -- many seqs/file */ while (!V->allDone) { while (! (feof(V->f) || strstr(V->s,"ENTRY") || strstr(V->s,"SEQUENCE")) ) getline1(V); strcpy(V->seqid, (V->s)+16); while (! (feof(V->f) || strstr(V->s,"SEQUENCE") == V->s)) getline1(V); readLoop(0, false, endPIR, V); if (!V->allDone) { while (! (feof(V->f) || ((*V->s != 0) && (strstr( V->s,"ENTRY") == V->s)))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endGB( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= (strstr(V->s,"LOCUS") == V->s); return ((strstr(V->s,"//") != NULL) || *ungetend);}Local void readGenBank(struct ReadSeqVars *V){ /*GenBank -- many seqs/file */ while (!V->allDone) { strcpy(V->seqid, (V->s)+12); while (! (feof(V->f) || strstr(V->s,"ORIGIN") == V->s)) getline1(V); readLoop(0, false, endGB, V); if (!V->allDone) { while (! (feof(V->f) || ((*V->s != 0) && (strstr( V->s,"LOCUS") == V->s)))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endNBRF( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ char *a; if ((a = strchr(V->s, '*')) != NULL) { /* end of 1st seq */ /* "*" can be valid base symbol, drop it here */ *a = 0; *addend = true; *ungetend= false; return(true); } else if (*V->s == '>') { /* start of next seq */ *addend = false; *ungetend= true; return(true); } else return(false);}Local void readNBRF(struct ReadSeqVars *V){ while (!V->allDone) { strcpy(V->seqid, (V->s)+4); getline1(V); /*skip title-junk line*/ readLoop(0, false, endNBRF, V); if (!V->allDone) { while (!(feof(V->f) || (*V->s != 0 && *V->s == '>'))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endPearson( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= true; return(*V->s == '>');}Local void readPearson(struct ReadSeqVars *V){ while (!V->allDone) { strlcpy(V->seqid, (V->s)+1, MAXLINE); readLoop(0, false, endPearson, V); if (!V->allDone) { while (!(feof(V->f) || ((*V->s != 0) && (*V->s == '>')))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endEMBL( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= (strstr(V->s,"ID ") == V->s); return ((strstr(V->s,"//") != NULL) || *ungetend);}Local void readEMBL(struct ReadSeqVars *V){ while (!V->allDone) { strcpy(V->seqid, (V->s)+5); do { getline1(V); } while (!(feof(V->f) | (strstr(V->s,"SQ ") == V->s))); readLoop(0, false, endEMBL, V); if (!V->allDone) { while (!(feof(V->f) | ((*V->s != '\0') & (strstr(V->s,"ID ") == V->s)))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endZuker( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){ *addend = false; *ungetend= true; return( *V->s == '(' );}Local void readZuker(struct ReadSeqVars *V){ /*! 1st string is Zuker's Fortran format */ while (!V->allDone) { getline1(V); /*s == "seqLen seqid string..."*/ strcpy(V->seqid, (V->s)+6); readLoop(0, false, endZuker, V); if (!V->allDone) { while (!(feof(V->f) | ((*V->s != '\0') & (*V->s == '(')))) getline1(V); } if (feof(V->f)) V->allDone = true; }}Local boolean endFitch( boolean *addend, boolean *ungetend, struct ReadSeqVars *V){
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?