⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sequence.c

📁 在任务级并行平台P2HP上
💻 C
📖 第 1 页 / 共 3 页
字号:
/********* Sequence input routines for CLUSTAL W *******************/
/* DES was here.  FEB. 1994 */
/* Now reads PILEUP/MSF and CLUSTAL alignment files */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "clustalw.h"	

/****************
Added by jf.
2005.1.20
******************/
#include <windows.h>
#define EOS '\0'
//end.



#define MIN(a,b) ((a)<(b)?(a):(b))



/*
*	Prototypes
*/

static char * get_seq(char *,sint *,char *);
static char * get_clustal_seq(char *,sint *,char *,sint);
static char * get_msf_seq(char *,sint *,char *,sint);
static void check_infile(sint *);
static void p_encode(char *, char *, sint);
static void n_encode(char *, char *, sint);
static sint res_index(char *,char);
static Boolean check_dnaflag(char *, sint);
static sint count_clustal_seqs(void);
static sint count_pir_seqs(void);
static sint count_msf_seqs(void);
static sint count_rsf_seqs(void);
static void get_swiss_feature(char *line,sint len);
static void get_rsf_feature(char *line,sint len);
static void get_swiss_mask(char *line,sint len);
static void get_clustal_ss(sint length);
static void get_embl_ss(sint length);
static void get_rsf_ss(sint length);
static void get_gde_ss(sint length);
static Boolean cl_blankline(char *line);

/*
 *	Global variables
 */
extern sint max_names;
FILE *fin;
extern Boolean usemenu, dnaflag, explicit_dnaflag;
extern Boolean interactive;
extern char seqname[];
extern sint nseqs;
extern sint *seqlen_array;
extern sint *output_index;
extern char **names,**titles;
extern char **seq_array;
extern Boolean profile1_empty, profile2_empty;
extern sint gap_pos2;
extern sint max_aln_length;
extern char *gap_penalty_mask, *sec_struct_mask;
extern sint struct_penalties;
extern char *ss_name;
extern sint profile_no;
extern sint debug;

char *amino_acid_codes   =    "ABCDEFGHIKLMNPQRSTUVWXYZ-";  /* DES */
static sint seqFormat;
static char chartab[128];
static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR",
			      "Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"};
//begin
const int MAC_FILENAMELENOPATH=200;
//end.

void fill_chartab(void)	/* Create translation and check table */
{
	register sint i;
	register char c;
	
	for(i=0;i<128;chartab[i++]=0);
	for(i=0;(c=amino_acid_codes[i]);i++)
		chartab[(int)c]=chartab[tolower(c)]=c;
}

static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno)
/* read the seqno_th. sequence from a PILEUP multiple alignment file */
{
	static char line[MAXLINE+1];
	char *seq = NULL;
	sint i,j,k;
	unsigned char c;

	fseek(fin,0,0); 		/* start at the beginning */

	*len=0;				/* initialise length to zero */
        for(i=0;;i++) {
		if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/
		if(linetype(line,"//") ) break;		    /* lines...ignore*/
	}

	while (fgets(line,MAXLINE+1,fin) != NULL) {
		if(!blankline(line)) {

			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
                        for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;
			for(k=j;k<=strlen(line);k++) if(line[k] == ' ') break;
			strncpy(sname,line+j,MIN(MAXNAMES,k-j)); 
			sname[MIN(MAXNAMES,k-j)]=EOS;
			rtrim(sname);
                       	blank_to_(sname);

			if(seq==NULL)
				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
			else
				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
			for(i=k;i<=MAXLINE;i++) {
				c=line[i];
				if(c == '.' || c == '~' ) c = '-';
				if(c == '*') c = 'X';
				if(c == '\n' || c == EOS) break; /* EOL */
				c=chartab[c];
				if(c) seq[++(*len)]=c;
			}

			for(i=0;;i++) {
				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
				if(blankline(line)) break;
			}
		}
	}
	return seq;
}

static Boolean cl_blankline(char *line)
{
	int i;

	if (line[0] == '!') return TRUE;
	
	for(i=0;line[i]!='\n' && line[i]!=EOS;i++) {
		if( isdigit(line[i]) ||
		    isspace(line[i]) ||
		    (line[i] == '*') ||
		    (line[i] == ':') ||
                    (line[i] == '.')) 
			;
		else
			return FALSE;
	}
	return TRUE;
}

static char * get_clustal_seq(char *sname,sint *len,char *tit,sint seqno)
/* read the seqno_th. sequence from a clustal multiple alignment file */
{
	static char line[MAXLINE+1];
	static char tseq[MAXLINE+1];
	char *seq = NULL;
	sint i,j;
	unsigned char c;

	fseek(fin,0,0); 		/* start at the beginning */

	*len=0;				/* initialise length to zero */
	fgets(line,MAXLINE+1,fin);	/* read the title line...ignore it */

	while (fgets(line,MAXLINE+1,fin) != NULL) {
		if(!cl_blankline(line)) {

			for(i=1;i<seqno;i++) fgets(line,MAXLINE+1,fin);
			for(j=0;j<=strlen(line);j++) if(line[j] != ' ') break;

			sscanf(line,"%s%s",sname,tseq);
			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
			sname[j]=EOS;
			rtrim(sname);
                       	blank_to_(sname);

			if(seq==NULL)
				seq=(char *)ckalloc((MAXLINE+2)*sizeof(char));
			else
				seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char));
			for(i=0;i<=MAXLINE;i++) {
				c=tseq[i];
				/*if(c == '\n' || c == EOS) break;*/ /* EOL */
				if(isspace(c) || c == EOS) break; /* EOL */
				c=chartab[c];
				if(c) seq[++(*len)]=c;
			}

			for(i=0;;i++) {
				if(fgets(line,MAXLINE+1,fin)==NULL) return seq;
				if(cl_blankline(line)) break;
			}
		}
	}

	return seq;
}

static void get_clustal_ss(sint length)
/* read the structure data from a clustal multiple alignment file */
{
	static char title[MAXLINE+1];
	static char line[MAXLINE+1];
	static char lin2[MAXLINE+1];
	static char tseq[MAXLINE+1];
	static char sname[MAXNAMES+1];
	sint i,j,len,ix,struct_index=0;
	char c;

	
	fseek(fin,0,0); 		/* start at the beginning */

	len=0;				/* initialise length to zero */
	if (fgets(line,MAXLINE+1,fin) == NULL) return;	/* read the title line...ignore it */

	if (fgets(line,MAXLINE+1,fin) == NULL) return;  /* read the next line... */
/* skip any blank lines */
	for (;;) {
		if(fgets(line,MAXLINE+1,fin)==NULL) return;
		if(!blankline(line)) break;
	}

/* look for structure table lines */
	ix = -1;
	for(;;) {
		if(line[0] != '!') break;
		if(strncmp(line,"!SS",3) == 0) {
			ix++;
			sscanf(line+4,"%s%s",sname,tseq);
			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
			sname[j]=EOS;
			rtrim(sname);
    		blank_to_(sname);
    		if (interactive) {
				strcpy(title,"Found secondary structure in alignment file: ");
				strcat(title,sname);
				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
			}
			else (*lin2) = 'y';
			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
				struct_penalties = SECST;
				struct_index = ix;
				for (i=0;i<length;i++)
				{
					sec_struct_mask[i] = '.';
					gap_penalty_mask[i] = '.';
				}
				strcpy(ss_name,sname);
				for(i=0;len < length;i++) {
					c = tseq[i];
					if(c == '\n' || c == EOS) break; /* EOL */
					if (!isspace(c)) sec_struct_mask[len++] = c;
				}
			}
		}
		else if(strncmp(line,"!GM",3) == 0) {
			ix++;
			sscanf(line+4,"%s%s",sname,tseq);
			for(j=0;j<MAXNAMES;j++) if(sname[j] == ' ') break;
			sname[j]=EOS;
			rtrim(sname);
    		blank_to_(sname);
    		if (interactive) {
				strcpy(title,"Found gap penalty mask in alignment file: ");
				strcat(title,sname);
				(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
			}
			else (*lin2) = 'y';
			if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
				struct_penalties = GMASK;
				struct_index = ix;
				for (i=0;i<length;i++)
					gap_penalty_mask[i] = '1';
					strcpy(ss_name,sname);
				for(i=0;len < length;i++) {
					c = tseq[i];
					if(c == '\n' || c == EOS) break; /* EOL */
					if (!isspace(c)) gap_penalty_mask[len++] = c;
				}
			}
		}
		if (struct_penalties != NONE) break;
		if(fgets(line,MAXLINE+1,fin)==NULL) return;
	}
			
	if (struct_penalties == NONE) return;
	
/* skip any more comment lines */
	while (line[0] == '!') {
		if(fgets(line,MAXLINE+1,fin)==NULL) return;
	}

/* skip the sequence lines and any comments after the alignment */
	for (;;) {
		if(isspace(line[0])) break;
		if(fgets(line,MAXLINE+1,fin)==NULL) return;
	}
			

/* read the rest of the alignment */
	
	for (;;) {
/* skip any blank lines */
			for (;;) {
				if(!blankline(line)) break;
				if(fgets(line,MAXLINE+1,fin)==NULL) return;
			}
/* get structure table line */
			for(ix=0;ix<struct_index;ix++) {
				if (line[0] != '!') {
					if(struct_penalties == SECST)
						error("bad secondary structure format");
					else
						error("bad gap penalty mask format");
				   	struct_penalties = NONE;
					return;
				}
				if(fgets(line,MAXLINE+1,fin)==NULL) return;
			}
			if(struct_penalties == SECST) {
				if (strncmp(line,"!SS",3) != 0) {
					error("bad secondary structure format");
					struct_penalties = NONE;
					return;
				}
				sscanf(line+4,"%s%s",sname,tseq);
				for(i=0;len < length;i++) {
					c = tseq[i];
					if(c == '\n' || c == EOS) break; /* EOL */
					if (!isspace(c)) sec_struct_mask[len++] = c;
				}			
			}
			else if (struct_penalties == GMASK) {
				if (strncmp(line,"!GM",3) != 0) {
					error("bad gap penalty mask format");
					struct_penalties = NONE;
					return;
				}
				sscanf(line+4,"%s%s",sname,tseq);
				for(i=0;len < length;i++) {
					c = tseq[i];
					if(c == '\n' || c == EOS) break; /* EOL */
					if (!isspace(c)) gap_penalty_mask[len++] = c;
				}			
			}

/* skip any more comment lines */
		while (line[0] == '!') {
			if(fgets(line,MAXLINE+1,fin)==NULL) return;
		}

/* skip the sequence lines */
		for (;;) {
			if(isspace(line[0])) break;
			if(fgets(line,MAXLINE+1,fin)==NULL) return;
		}
	}
}

static void get_embl_ss(sint length)
{
	static char title[MAXLINE+1];
	static char line[MAXLINE+1];
	static char lin2[MAXLINE+1];
	static char sname[MAXNAMES+1];
	char feature[MAXLINE+1];
	sint i;

/* find the start of the sequence entry */
	for (;;) {
		while( !linetype(line,"ID") )
			if (fgets(line,MAXLINE+1,fin) == NULL) return;
			
    	for(i=5;i<=strlen(line);i++)  /* DES */
			if(line[i] != ' ') break;
		strncpy(sname,line+i,MAXNAMES); /* remember entryname */
    		for(i=0;i<=strlen(sname);i++)
			if(sname[i] == ' ') {
				sname[i]=EOS;
				break;
			}
		sname[MAXNAMES]=EOS;
		rtrim(sname);
    	blank_to_(sname);
		
/* look for secondary structure feature table / gap penalty mask */
		while(fgets(line,MAXLINE+1,fin) != NULL) {
			if (linetype(line,"FT")) {
				sscanf(line+2,"%s",feature);
				if (strcmp(feature,"HELIX") == 0 ||
				    strcmp(feature,"STRAND") == 0)
				{

				if (interactive) {
					strcpy(title,"Found secondary structure in alignment file: ");
					strcat(title,sname);
					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
				}
				else (*lin2) = 'y';
				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
					struct_penalties = SECST;
					for (i=0;i<length;i++)
						sec_struct_mask[i] = '.';
					do {
						get_swiss_feature(&line[2],length);
						fgets(line,MAXLINE+1,fin);
					} while( linetype(line,"FT") );
				}
				else {
					do {
						fgets(line,MAXLINE+1,fin);
					} while( linetype(line,"FT") );
				}
				strcpy(ss_name,sname);
				}
			}
			else if (linetype(line,"GM")) {
				if (interactive) {
					strcpy(title,"Found gap penalty mask in alignment file: ");
					strcat(title,sname);
					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
				}
				else (*lin2) = 'y';
				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
					struct_penalties = GMASK;
					for (i=0;i<length;i++)
						gap_penalty_mask[i] = '1';
					do {
						get_swiss_mask(&line[2],length);
						fgets(line,MAXLINE+1,fin);
					} while( linetype(line,"GM") );
				}
				else {
					do {
						fgets(line,MAXLINE+1,fin);
					} while( linetype(line,"GM") );
				}
				strcpy(ss_name,sname);
			}
			if (linetype(line,"SQ"))
				break;	

			if (struct_penalties != NONE) break;			
		}
						
	}
						
}

static void get_rsf_ss(sint length)
{
	static char title[MAXLINE+1];
	static char line[MAXLINE+1];
	static char lin2[MAXLINE+1];
	static char sname[MAXNAMES+1];
	sint i;

/* skip the comments */
	while (fgets(line,MAXLINE+1,fin) != NULL) {
 		if(line[strlen(line)-2]=='.' &&
                                 line[strlen(line)-3]=='.')
			break;
	}

/* find the start of the sequence entry */
	for (;;) {
		while (fgets(line,MAXLINE+1,fin) != NULL)
                	if( *line == '{' ) break;

		while( !keyword(line,"name") )
			if (fgets(line,MAXLINE+1,fin) == NULL) return;
			
    	for(i=5;i<=strlen(line);i++)  /* DES */
			if(line[i] != ' ') break;
		strncpy(sname,line+i,MAXNAMES); /* remember entryname */
    		for(i=0;i<=strlen(sname);i++)
			if(sname[i] == ' ') {
				sname[i]=EOS;
				break;
			}
		sname[MAXNAMES]=EOS;
		rtrim(sname);
    	blank_to_(sname);
		
/* look for secondary structure feature table / gap penalty mask */
		while(fgets(line,MAXLINE+1,fin) != NULL) {
			if (keyword(line,"feature")) {
				if (interactive) {
					strcpy(title,"Found secondary structure in alignment file: ");
					strcat(title,sname);
					(*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties ");
				}
				else (*lin2) = 'y';
				if ((*lin2 != 'n') && (*lin2 != 'N'))  {               	
					struct_penalties = SECST;
					for (i=0;i<length;i++)
						sec_struct_mask[i] = '.';
					do {
						if(keyword(line,"feature"))
							get_rsf_feature(&line[7],length);
						fgets(line,MAXLINE+1,fin);
					} while( !keyword(line,"sequence") );
				}
				else {
					do {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -