⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sequence.c

📁 生物序列比对程序clustw的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
/********* Sequence input routines for CLUSTAL W *******************//* DES was here.  FEB. 1994 *//* Now reads PILEUP/MSF and CLUSTAL alignment files */#include <stdio.h>#include <string.h>#include <ctype.h>#include <stdlib.h>#include "clustalw.h"	#define MIN(a,b) ((a)<(b)?(a):(b))/**	Prototypes*/static char * get_seq(char *,sint *,char *);static char * get_clustal_seq(char *,sint *,char *,sint);static char * get_msf_seq(char *,sint *,char *,sint);static void check_infile(sint *);static void p_encode(char *, char *, sint);static void n_encode(char *, char *, sint);static sint res_index(char *,char);static Boolean check_dnaflag(char *, sint);static sint count_clustal_seqs(void);static sint count_pir_seqs(void);static sint count_msf_seqs(void);static sint count_rsf_seqs(void);static void get_swiss_feature(char *line,sint len);static void get_rsf_feature(char *line,sint len);static void get_swiss_mask(char *line,sint len);static void get_clustal_ss(sint length);static void get_embl_ss(sint length);static void get_rsf_ss(sint length);static void get_gde_ss(sint length);static Boolean cl_blankline(char *line);/* *	Global variables */extern sint max_names;FILE *fin;extern Boolean usemenu, dnaflag, explicit_dnaflag;extern Boolean interactive;extern char seqname[];extern sint nseqs;extern sint *seqlen_array;extern sint *output_index;extern char **names,**titles;extern char **seq_array;extern Boolean profile1_empty, profile2_empty;extern sint gap_pos2;extern sint max_aln_length;extern char *gap_penalty_mask, *sec_struct_mask;extern sint struct_penalties;extern char *ss_name;extern sint profile_no;extern sint debug;char *amino_acid_codes   =    "ABCDEFGHIKLMNPQRSTUVWXYZ-";  /* DES */static sint seqFormat;static char chartab[128];static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR",			      "Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"};void fill_chartab(void)	/* Create translation and check table */{	register sint i;	register char c;		for(i=0;i<128;chartab[i++]=0);	for(i=0;(c=amino_acid_codes[i]);i++)		chartab[(int)c]=chartab[tolower(c)]=c;}static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno)/* read the seqno_th. sequence from a PILEUP multiple alignment file */{	static char line[MAXLINE+1];	char *seq = NULL;	sint i,j,k;	unsigned char c;	fseek(fin,0,0); 		/* start at the beginning */	*len=0;				/* initialise length to zero */        for(i=0;;i++) {		if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/		if(linetype(line,"//") ) break;		    /* lines...ignore*/	}	while (fgets(line,MAXLINE+1,fin) != NULL) {		if(!blankline(line)) {			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);                        for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;			for(k=j;k<=strlen(line);k++) if(line[k] == ' ') break;			strncpy(sname,line+j,MIN(MAXNAMES,k-j)); 			sname[MIN(MAXNAMES,k-j)]=EOS;			rtrim(sname);                       	blank_to_(sname);			if(seq==NULL)				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));			else				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));			for(i=k;i<=MAXLINE;i++) {				c=line[i];				if(c == '.' || c == '~' ) c = '-';				if(c == '*') c = 'X';				if(c == '\n' || c == EOS) break; /* EOL */				c=chartab[c];				if(c) seq[++(*len)]=c;			}			for(i=0;;i++) {				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;				if(blankline(line)) break;			}		}	}	return seq;}static Boolean cl_blankline(char *line){	int i;	if (line[0] == '!') return TRUE;		for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {		if( isdigit(line[i]) ||		    isspace(line[i]) ||		    (line[i] == '*') ||		    (line[i] == ':') ||                    (line[i] == '.')) 			;		else			return FALSE;	}	return TRUE;}static char * get_clustal_seq(char *sname,sint *len,char *tit,sint seqno)/* read the seqno_th. sequence from a clustal multiple alignment file */{	static char line[MAXLINE+1];	static char tseq[MAXLINE+1];	char *seq = NULL;	sint i,j;	unsigned char c;	fseek(fin,0,0); 		/* start at the beginning */	*len=0;				/* initialise length to zero */	fgets(line,MAXLINE+1,fin);	/* read the title line...ignore it */	while (fgets(line,MAXLINE+1,fin) != NULL) {		if(!cl_blankline(line)) {			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);			for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;			sscanf(line,"%s%s",sname,tseq);			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;			sname[j]=EOS;			rtrim(sname);                       	blank_to_(sname);			if(seq==NULL)				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));			else				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));			for(i=0;i<=MAXLINE;i++) {				c=tseq[i];				/*if(c == '\n' || c == EOS) break;*/ /* EOL */				if(isspace(c) || c == EOS) break; /* EOL */				c=chartab[c];				if(c) seq[++(*len)]=c;			}			for(i=0;;i++) {				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;				if(cl_blankline(line)) break;			}		}	}	return seq;}static void get_clustal_ss(sint length)/* read the structure data from a clustal multiple alignment file */{	static char title[MAXLINE+1];	static char line[MAXLINE+1];	static char lin2[MAXLINE+1];	static char tseq[MAXLINE+1];	static char sname[MAXNAMES+1];	sint i,j,len,ix,struct_index=0;	char c;		fseek(fin,0,0); 		/* start at the beginning */	len=0;				/* initialise length to zero */	if (fgets(line,MAXLINE+1,fin) == NULL) return;	/* read the title line...ignore it */	if (fgets(line,MAXLINE+1,fin) == NULL) return;  /* read the next line... *//* skip any blank lines */	for (;;) {		if(fgets(line,MAXLINE+1,fin)==NULL) return;		if(!blankline(line)) break;	}/* look for structure table lines */	ix = -1;	for(;;) {		if(line[0] != '!') break;		if(strncmp(line,"!SS",3) == 0) {			ix++;			sscanf(line+4,"%s%s",sname,tseq);			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;			sname[j]=EOS;			rtrim(sname);    		blank_to_(sname);    		if (interactive) {				strcpy(title,"Found secondary structure in alignment file: ");				strcat(title,sname);				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");			}			else (*lin2) = 'y';			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               					struct_penalties = SECST;				struct_index = ix;				for (i=0;i<length;i++)				{					sec_struct_mask[i] = '.';					gap_penalty_mask[i] = '.';				}				strcpy(ss_name,sname);				for(i=0;len < length;i++) {					c = tseq[i];					if(c == '\n' || c == EOS) break; /* EOL */					if (!isspace(c)) sec_struct_mask[len++] = c;				}			}		}		else if(strncmp(line,"!GM",3) == 0) {			ix++;			sscanf(line+4,"%s%s",sname,tseq);			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;			sname[j]=EOS;			rtrim(sname);    		blank_to_(sname);    		if (interactive) {				strcpy(title,"Found gap penalty mask in alignment file: ");				strcat(title,sname);				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");			}			else (*lin2) = 'y';			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               					struct_penalties = GMASK;				struct_index = ix;				for (i=0;i<length;i++)					gap_penalty_mask[i] = '1';					strcpy(ss_name,sname);				for(i=0;len < length;i++) {					c = tseq[i];					if(c == '\n' || c == EOS) break; /* EOL */					if (!isspace(c)) gap_penalty_mask[len++] = c;				}			}		}		if (struct_penalties != NONE) break;		if(fgets(line,MAXLINE+1,fin)==NULL) return;	}				if (struct_penalties == NONE) return;	/* skip any more comment lines */	while (line[0] == '!') {		if(fgets(line,MAXLINE+1,fin)==NULL) return;	}/* skip the sequence lines and any comments after the alignment */	for (;;) {		if(isspace(line[0])) break;		if(fgets(line,MAXLINE+1,fin)==NULL) return;	}			/* read the rest of the alignment */		for (;;) {/* skip any blank lines */			for (;;) {				if(!blankline(line)) break;				if(fgets(line,MAXLINE+1,fin)==NULL) return;			}/* get structure table line */			for(ix=0;ix<struct_index;ix++) {				if (line[0] != '!') {					if(struct_penalties == SECST)						error("bad secondary structure format");					else						error("bad gap penalty mask format");				   	struct_penalties = NONE;					return;				}				if(fgets(line,MAXLINE+1,fin)==NULL) return;			}			if(struct_penalties == SECST) {				if (strncmp(line,"!SS",3) != 0) {					error("bad secondary structure format");					struct_penalties = NONE;					return;				}				sscanf(line+4,"%s%s",sname,tseq);				for(i=0;len < length;i++) {					c = tseq[i];					if(c == '\n' || c == EOS) break; /* EOL */					if (!isspace(c)) sec_struct_mask[len++] = c;				}						}			else if (struct_penalties == GMASK) {				if (strncmp(line,"!GM",3) != 0) {					error("bad gap penalty mask format");					struct_penalties = NONE;					return;				}				sscanf(line+4,"%s%s",sname,tseq);				for(i=0;len < length;i++) {					c = tseq[i];					if(c == '\n' || c == EOS) break; /* EOL */					if (!isspace(c)) gap_penalty_mask[len++] = c;				}						}/* skip any more comment lines */		while (line[0] == '!') {			if(fgets(line,MAXLINE+1,fin)==NULL) return;		}/* skip the sequence lines */		for (;;) {			if(isspace(line[0])) break;			if(fgets(line,MAXLINE+1,fin)==NULL) return;		}	}}static void get_embl_ss(sint length){	static char title[MAXLINE+1];	static char line[MAXLINE+1];	static char lin2[MAXLINE+1];	static char sname[MAXNAMES+1];	char feature[MAXLINE+1];	sint i;/* find the start of the sequence entry */	for (;;) {		while( !linetype(line,"ID") )			if (fgets(line,MAXLINE+1,fin) == NULL) return;			    	for(i=5;i<=strlen(line);i++)  /* DES */			if(line[i] != ' ') break;		strncpy(sname,line+i,MAXNAMES); /* remember entryname */    		for(i=0;i<=strlen(sname);i++)			if(sname[i] == ' ') {				sname[i]=EOS;				break;			}		sname[MAXNAMES]=EOS;		rtrim(sname);    	blank_to_(sname);		/* look for secondary structure feature table / gap penalty mask */		while(fgets(line,MAXLINE+1,fin) != NULL) {			if (linetype(line,"FT")) {				sscanf(line+2,"%s",feature);				if (strcmp(feature,"HELIX") == 0 ||				    strcmp(feature,"STRAND") == 0)				{				if (interactive) {					strcpy(title,"Found secondary structure in alignment file: ");					strcat(title,sname);					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");				}				else (*lin2) = 'y';				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               						struct_penalties = SECST;					for (i=0;i<length;i++)						sec_struct_mask[i] = '.';					do {						get_swiss_feature(&line[2],length);						fgets(line,MAXLINE+1,fin);					} while( linetype(line,"FT") );				}				else {					do {						fgets(line,MAXLINE+1,fin);					} while( linetype(line,"FT") );				}				strcpy(ss_name,sname);				}			}			else if (linetype(line,"GM")) {				if (interactive) {					strcpy(title,"Found gap penalty mask in alignment file: ");					strcat(title,sname);					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");				}				else (*lin2) = 'y';				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               						struct_penalties = GMASK;					for (i=0;i<length;i++)						gap_penalty_mask[i] = '1';					do {						get_swiss_mask(&line[2],length);						fgets(line,MAXLINE+1,fin);					} while( linetype(line,"GM") );				}				else {					do {						fgets(line,MAXLINE+1,fin);					} while( linetype(line,"GM") );				}				strcpy(ss_name,sname);			}			if (linetype(line,"SQ"))				break;				if (struct_penalties != NONE) break;					}							}						}static void get_rsf_ss(sint length){	static char title[MAXLINE+1];	static char line[MAXLINE+1];	static char lin2[MAXLINE+1];	static char sname[MAXNAMES+1];	sint i;/* skip the comments */	while (fgets(line,MAXLINE+1,fin) != NULL) { 		if(line[strlen(line)-2]=='.' &&                                 line[strlen(line)-3]=='.')			break;	}/* find the start of the sequence entry */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -